framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,5.250864028930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,5.2867787679036455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,4.707263946533203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,32.7096201578776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,5.319696108500163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,5.354682922363281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,32.75561014811198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,4.788928031921387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,29.768875122070312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,5.350805282592773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,32.83922576904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,5.391989390055339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,29.86261749267578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,4.828666687011719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,32.86980183919271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,3.078549385070801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,32.90584055582682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,3.1395947138468423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,16.973242441813152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,2.8790613810221353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,29.88354746500651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,33.00720977783203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,2.735941251118978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,2.755642573038737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,17.043802897135418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,15.47006352742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,2.4542506535847983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,2.740229288736979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,16.55779774983724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,2.7648960749308267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,15.033018747965494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,2.47051207224528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,16.618175506591797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,16.55931218465169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,2.753477414449056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,2.777344067891439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,2.4867307345072427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,15.070779164632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,16.55734380086263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,1.6715787251790364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,16.593029022216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,1.7107307116190593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,1.5829706192016602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,8.684213638305664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,16.589845021565754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,15.091696421305338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,1.5120372772216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,1.5221439997355144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,7.926501592000325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,1.3759946823120117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,8.725519816080729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,8.480917612711588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,1.5162506103515625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,1.528037389119466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,8.464778900146484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,7.704522450764974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,1.3831359545389812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,8.49127451578776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,1.5233492851257324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,1.5357920328776042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,7.71232541402181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,8.480031967163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,1.3905706405639648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.165765364964803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,8.507216135660807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.1665653387705486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.0842133363087971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,7.724805196126302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,8.494634628295898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.1673440138498943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,4.726181348164876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,4.306506792704265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.1678026517232258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.0837600231170654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,4.691184043884277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,4.732458750406901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.1665813128153484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,4.7026933034261065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.1683733463287354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,4.303386688232422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.0841013590494792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.1673706372578938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,4.696304003397624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,4.2986453374226885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,4.707573254903157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.083173354466756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,4.706048011779785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.167200009028117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,4.300570805867513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,4.708229382832845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,3.9117654164632163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,3.491135915120443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,3.936767896016439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,3.9265012741088867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,19.229925791422527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,19.261600494384766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,17.48369598388672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,3.9548638661702475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,3.529424031575521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,19.250661214192707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,3.95249080657959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,17.476416269938152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,19.33019256591797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,3.979647954305013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,3.5582399368286133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,2.3023413022359214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,19.32201639811198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,2.35315736134847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,2.1539999643961587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,10.10368537902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,19.394234975179035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,17.53168487548828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,2.050607999165853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,10.129557291666666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,2.0660160382588706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,1.8432159423828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,9.174266815185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,2.0534720420837402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,9.747978846232096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,2.0723466873168945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,8.86625607808431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,9.775829315185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,1.8535359700520833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,2.065098603566488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,9.76910400390625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,2.0829760233561196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,1.8671040534973145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,8.890591939290365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,9.795007705688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.2569013436635335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,9.796560287475586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.28439466158549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,1.190351963043213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,5.1855519612630205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,8.904703776041666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,9.810629526774088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.1383626461029053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.145242691040039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,5.213370641072591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,1.0374613602956135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,5.034666697184245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.1390293439229329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,5.048037211100261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,4.591103871663411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.1500853697458904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,4.744144121805827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,5.040682792663574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.147488037745158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,1.043338696161906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,4.595936139424642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.1571093400319417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,1.049455960591634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,5.053429285685222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,0.8784800370534261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,5.068314552307129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,4.603871981302897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,0.8791946570078532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,5.044837315877278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,0.8176639874776205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,2.880986531575521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,0.8794559637705485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,2.888186772664388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,2.8669811884562173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,0.817909320195516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,2.6316213607788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,2.629711945851644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,0.8798399766286215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,0.879466692606608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,0.8799359798431396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,0.8184213638305664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,2.8629706700642905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,0.8794933160146078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,2.871648152669271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,2.871077219645182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,0.8815413316090902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,2.8692906697591147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,2.628554662068685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,2.876234690348307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,2.6264425913492837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,0.8167040348052979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,3.240847905476888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,3.2684640884399414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,2.9023094177246094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,3.2592639923095703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,13.800832112630209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,12.507381439208984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,3.286106745402018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,2.920970598856608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,13.825492858886719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,13.85208511352539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,3.274165471394857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,3.3031253814697266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,2.947850545247396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,13.8646608988444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,12.532117207845053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,1.9140532811482747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,13.889349619547525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,1.9586666425069172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,1.798309326171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,7.287210464477539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,12.574544270833334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,13.886085510253906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,1.7049226760864258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,7.336885452270508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,1.7221867243448894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,6.647594451904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,1.539365291595459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,1.7137494087219238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,7.012682596842448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,1.7266772588094075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,7.041872024536133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,6.381631851196289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,1.5478347142537434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,1.720901330312093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,7.026826858520508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,1.7365013758341472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,6.39243761698405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,7.060784022013347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,1.5594773292541504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,1.0474186738332112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,7.047098795572917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.0721866289774578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,0.9963359832763672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,6.4122772216796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,7.06987190246582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,0.9453279972076416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,3.762234687805176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,3.7914454142252603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,3.4578240712483725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,0.9561920166015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,0.8658400376637777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,3.640437444051107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,0.9541066487630209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,0.9610186417897543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,3.652026812235514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,3.314629236857096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,0.8730719884236654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,3.649072011311849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,0.9593706925710043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,0.9652213255564371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,3.3213014602661133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,0.8772160212198893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,3.659488042195638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,0.7364160219828287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,3.659359931945801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,3.660623868306478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,0.7365919748942057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,0.6851733525594076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,2.1276639302571616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,0.7364532947540283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,2.1297760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,1.9414933522542317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,0.7365972995758057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,0.6848959922790527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,2.12609593073527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,0.7367786566416422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,1.939578692118327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,0.7376373608907064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,2.1205172538757324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,0.6850293477376302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,2.1159626642862954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,2.118778705596924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,3.3309974670410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,0.7373812993367513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,2.1239733695983887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,1.9384907086690266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,0.7366666793823242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,2.119258721669515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,1.9392800331115723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,0.6844693024953207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,5.0981705983479815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,4.557514508565267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,5.122394561767578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,5.153541247049968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,18.39786656697591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,16.676340738932293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,18.45442072550456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,5.201098759969075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,4.645413398742676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,18.495525360107422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,5.193754514058431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,5.229237238566081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,16.761744181315105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,18.556084950764973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,4.683712005615234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,2.939354578653971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,18.56233088175456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,2.993845303853353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,2.740362803141276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,9.698085149129232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,16.826148986816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,18.597503662109375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,2.5889546076456704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,2.611759980519613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,9.751045227050781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,8.836389541625977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,2.320634682973226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,9.26528549194336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,2.599402745564779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,2.6202826499938965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,8.399173100789389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,9.289301554361979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,2.3331947326660156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,9.293269475301107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,2.615455945332845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,2.6378773053487143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,9.315120061238607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,8.41540273030599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,2.3550987243652344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,1.5314292907714844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,9.335007985432943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,1.5660212834676106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,9.347066879272461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,4.936911900838216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,8.43501345316569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,1.3650347391764324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,1.4412585894266765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,4.510538736979167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,1.3789760271708171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,1.2354880174001057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,4.725599924723308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,4.973082542419434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,1.3710880279541016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,4.7477067311604815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,1.3823359807332356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,4.299578666687012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,1.2429227034250896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,4.747322718302409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,1.3792479832967122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,4.3072052001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,1.252351999282837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,4.7593332926432295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,1.3922719955444336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,0.8435359795888265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,4.770751953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,4.756138801574707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,4.316501299540202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,2.573813279469808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,0.8014933268229166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,0.7626612981160482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,2.5921173095703125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,2.3630773226420083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,0.7687786420186361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,0.6973333358764648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,2.471754709879557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,0.7676426569620768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,2.477663993835449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,0.8578346570332845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,0.7714400291442871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,0.7032372951507568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,2.4803946812947593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,0.7688586711883545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,2.48581870396932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,2.261125405629476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,0.7747946580251058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,0.705567995707194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,2.4854453404744468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,2.25600528717041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.5931413173675537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,2.493722597757975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.59279998143514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,1.4802506764729817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,1.4839733441670735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,1.3566986719767253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.5932266712188721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,2.264026641845703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.5939733187357584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,1.47379732131958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.5520000060399374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.5930080016454061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,1.3550559679667156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.5937759876251221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,1.4772960344950359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.5525813500086466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,1.475711981455485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,1.3535092671712239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.5927146673202515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.5935946702957153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,1.4767999649047852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.5516213178634644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,1.4784000714619954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,1.4763092994689941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,1.3524799346923828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.5519786675771078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,3.777813275655111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,3.803914705912272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,3.379418690999349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,3.7975571950276694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,11.091103871663412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,11.120372772216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,3.820570627848307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,10.026538848876953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,11.144357045491537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,3.417327880859375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,3.821605364481608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,11.160469055175781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,10.068031946818033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,3.8525705337524414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,3.4468533198038735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,2.1925172805786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,11.174293518066406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,5.9196211496988935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,11.203125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,2.239413261413574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,10.116175969441732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,1.935157299041748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,2.0508106549580893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,5.9536692301432295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,5.412911732991536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,1.9496266047159831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,1.7387572924296062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,1.943738619486491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,5.619157155354817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,1.9616106351216633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,5.628917058308919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,1.754426638285319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,5.633263905843099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,5.083061218261719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,1.9540054003397624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,5.64799435933431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,5.100677490234375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,1.9715253512064617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,1.7610559463500977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,5.65060297648112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,3.0293760299682617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,5.10262934366862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.1737546920776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,1.0833760102589924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.1489493052164714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,3.0954294204711914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,1.0261120001475017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,1.033471981684367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,5.6747894287109375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,2.882490793863932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,2.7744906743367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,1.0292320251464844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,2.8926719029744468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,2.6196319262186685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,0.9309226671854655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,1.0378560225168865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,2.889984130859375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,1.0343413352966309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,2.910341262817383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,2.6275787353515625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,0.9355839888254801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,1.0444107055664062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,2.900815963745117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.6347999970118204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,2.9128106435139975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,2.635632038116455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,0.9437546730041504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,0.6058933337529501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,1.5923573176066081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.5751359860102335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,1.6112213134765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,1.4713385899861653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.5793919960657755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,1.523680051167806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,1.5295573870340984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,1.3896959622701008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.5789546569188436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,0.5286666552225748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,1.5283840497334797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,0.531333327293396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,1.5314666430155437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,1.3960480690002441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.58078400293986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.5829333464304606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.64956267674764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.5857546726862589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,1.53654940923055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.449893315633138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,1.538991928100586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,0.9492106437683105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.4190186659495036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,0.5357226530710856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,0.9486186504364014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,0.8693013191223145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,1.3969119389851887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.449343999226888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,0.9412960211435953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.41892266273498535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,0.9424160321553549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,0.867199977238973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.4484959840774536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.4485066731770833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,0.9416960080464681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.44995200634002686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,0.9430987040201823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,0.8672373294830322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.44862401485443115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,0.9451680183410645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.44857601324717206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.4182399908701579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,0.9456853071848551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,0.8672746817270914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.449072003364563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.418506662050883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,4.980586687723796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,5.006143887837728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,4.474282582600911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,11.138020833333334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,11.184730529785156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,5.05077330271403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,10.046106974283854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,4.564106623331706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,5.080437342325847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,11.242708841959635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,11.218805948893229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,5.081935882568359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,10.154725392659506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,5.110495885213216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,4.6025387446085615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,2.8558667500813804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,11.26971181233724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,2.8961172103881836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,11.306682586669922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,10.187072118123373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,5.998453140258789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,2.66266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,2.4975627263387046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,6.0357011159261065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,5.480031967163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,2.5186773935953775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,5.587888081868489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,2.2430240313212075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,2.5058239301045737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,5.051904042561849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,2.527477264404297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,5.609578450520833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,2.2590932846069336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,5.609184265136719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,2.5276853243509927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,5.071642557779948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,5.6320374806722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,2.5496479670206704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,5.6314347585042315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,1.4594079653422039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,5.657381057739258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,2.3083465894063315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,5.086154619852702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,3.0428053538004556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,1.3707146644592285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,1.290181318918864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,3.0750932693481445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,2.7906080881754556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,1.487610658009847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,1.3013652960459392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,1.1637333234151204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,2.860410690307617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,1.2945280075073242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,2.8567946751912436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,1.3071680068969727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,2.8591092427571616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,1.1715359687805176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,1.301199992497762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,2.581834634145101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,2.870335896809896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,1.3163039684295654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,1.181930700937907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,2.8674453099568686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,0.7704906463623047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,2.58733336130778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,2.88753604888916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,0.7874293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,1.5732107162475586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,0.7297173341115316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,0.6887679894765218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,1.44870392481486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,0.6927839914957682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,1.47979736328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,0.6257760127385458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,1.4867733319600422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,1.3432265917460124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,0.6918986638387045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,2.5962506930033364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,0.6973919868469238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,0.6296639839808146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,1.5904107093811035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,1.4947679837544758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,0.6938827037811279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,0.7009119987487793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,0.6339093446731567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,1.3473599751790364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,1.5002187093098958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,1.3516052563985188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,1.4928320248921711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.4399626652399699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,0.8430666923522949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.4107840061187744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,0.8533600171407064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,0.783402681350708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.38835732142130536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,1.4835093816121419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,0.7965919971466064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.3582613468170166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,0.8007946809132894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,0.7292479674021403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.39212799072265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.3936266501744588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.4291306734085083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.36163731416066486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,0.8044266700744629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,0.8000319798787435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,0.7319466272989908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.393669327100118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,0.8034186363220215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.39821334679921466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.3640799919764201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,0.8079679807027181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,0.7354133129119873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.3067733248074849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.3070773283640544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.3901760180791219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.285589337348938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.5277386506398519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.48522667090098065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.30404265721638996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.28363200028737384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.5220853487650553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.5250773429870605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.48211201032002765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.3041066726048787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.3040906588236491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.5199679931004842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.2836266756057739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.5209866762161255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.4806079864501953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.30395734310150146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.5227786699930826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.28569066524505615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.3041066726048787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.5206026633580526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.4806613524754842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.3043733239173889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.5216853221257528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,3.696613311767578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,3.71999454498291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,3.3110879262288413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,6.973557154337565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,3.7212851842244468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,6.998240152994792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,6.267119725545247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,7.002176284790039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,3.7513599395751953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,3.3554986317952475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,3.7459894816080728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,7.030511856079102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,6.3206132253011065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,3.7713600794474282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,7.036074956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,3.3803574244181314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,2.1365226109822593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,7.077189127604167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,6.339962641398112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,1.993168036142985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,3.839285214742025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,2.1720266342163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,3.4941333134969077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,1.8715306917826335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,1.8891466458638508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,1.6815306345621746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,3.5192747116088867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,1.885983943939209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,3.5343147913614907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,3.1694294611612954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,3.8090667724609375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,1.90227206548055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,1.6941280364990234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,3.53549861907959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,3.548410733540853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,1.8933119773864746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,3.18944517771403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,1.7076586087544758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,3.5525598526000977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.0996053218841553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,3.5724105834960938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,3.195136070251465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,1.9102880160013835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.1201866467793782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,1.9455520311991374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,1.0298986434936523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,0.971397320429484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,1.963578701019287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,1.791327953338623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,0.9798453648885092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,0.8764106432596842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,1.815440018971761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,0.976581335067749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,1.6313865979512532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,0.9839306672414144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,0.8856639862060547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,1.8143307367960613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,0.9823306401570638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,1.8186826705932617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,0.9905546506245931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,1.8199626604715984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,1.8010133107503254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,0.8903520107269287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.5820000171661377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,1.6392906506856282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,1.8297813733418782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,1.0140426953633626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,0.551471988360087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,1.027728001276652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,1.6434399286905925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.5948479970296224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,0.9444479942321777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.5240853230158488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,0.4752693176269531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,0.9497919877370199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,0.8597066402435303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,0.9477439721425375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.5271039803822836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,0.9394880135854086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,0.4765333334604899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.5229440132776896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,0.9527466297149658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.5251893202463785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.5295466581980387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,0.4802560011545817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.5192053318023682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,0.9575040340423584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,0.8642720381418864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,0.8624746799468994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.32663466533025104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.3354133367538452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.3122933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,0.9529333114624023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.5587519804636637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.2929439942042033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.5144799947738647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.27322665850321454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.5154133240381876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.550378680229187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.47417600949605304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.29503466685612995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.5145599842071533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.5167573293050131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.2938399910926819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.5172853469848633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.4758400122324626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.29731200138727826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.5193440119425455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.2772960066795349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.29747732480367023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.2756426731745402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.47861866156260174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.2327786684036255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.3545440038045247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.2339413364728292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.21824000279108682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.35554667313893634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.3288693428039551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.2286400000254313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.35045333703358966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.5376000006993612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.21793067455291748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.3532426754633586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.3251519997914632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.23061867554982504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.3508533239364624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.299946665763855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.2157706618309021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.3520053227742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.2321280042330424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.3529866536458333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.23079466819763184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.21632534265518188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.2323253353436788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.352453351020813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.3261173367500305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.3250826597213745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.23017066717147827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,4.943925221761067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,4.400256156921387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,7.516159693400065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,4.969578742980957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,6.719333648681641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,7.526842753092448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,5.094789187113444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,7.645626703898112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,4.445125261942546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,5.08026123046875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,7.634250640869141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,5.102965354919434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,6.756432215372722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,7.689157485961914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,5.089658737182617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,4.488304138183594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,2.8237441380818686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,7.6479841868082685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,4.134480158487956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,6.808607737223308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,2.61737060546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,2.8574241002400718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,4.169546763102214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,3.7982559204101562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,2.4565439224243164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,2.4784587224324546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,3.7457119623819985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,3.7605759302775064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,2.1954612731933594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,3.3598505655924478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,2.4720800717671714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,2.496293385823568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,3.765109380086263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,2.4922186533610025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,3.7840534845987954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,2.212432066599528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,2.2341334025065103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,2.511610666910807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,3.7848211924235025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,1.431722640991211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,3.3973493576049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,3.7996479670206704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,3.379136085510254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,2.0949494043986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,1.4535199801127117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,2.113423983256022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,1.925594647725423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.2653546333312988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,1.9039039611816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,1.3311839898427327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,1.1249066988627117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,1.2537493705749512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,1.9158026377360027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,1.2625866731007893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,1.2747413317362468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,1.9104266166687012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,1.1330080032348633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,1.7099839846293132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,1.9239253997802734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,1.2827946345011394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,1.9214666684468586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,1.1436533133188884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,1.269536018371582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,1.7204853693644206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,1.9321759541829426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,0.7385066350301107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,1.0762186845143635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,0.7544000148773193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,0.6923147042592367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,1.0895679791768391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,0.6530293226242065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,0.9946933587392172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,0.6586666504542033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,1.7312426567077637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,0.9887200196584066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,0.8889386653900146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,0.9876426855723063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,0.6630560159683228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,0.595146656036377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,0.6573119958241781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,0.995573361714681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,0.8952000141143799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,0.9823253154754639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,0.6624533335367838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,0.6663626829783121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,0.9934506416320801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,0.600330670674642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,0.9979893366495768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.5698239803314209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,0.5899999936421713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.40320531527201336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.3749599854151408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.5774879852930704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.5313013394673666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.3509440024693807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.5215093294779459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.35345598061879474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.32265599568684894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.5242666800816854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.477130651473999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.3529493411382039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.5228106578191122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.3251519997914632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.39561065038045246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.5277653137842814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.47833065191904706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.5255039930343628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.35450132687886554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.32628266016642254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,0.8989600340525309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.529423991839091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.48077865441640216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.22365333636601767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.3163306713104248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.21410133441289267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.3574719826380412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.3213119904200236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.29781333605448407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.29054399331410724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.20110400517781576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.18542399009068808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.29042667150497437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.26952532927195233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.20045334100723267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.29132266839345294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.3559466600418091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.2306613326072693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.1877653400103251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.2918773293495178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.20015466213226318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.27134400606155396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.2016800045967102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.292303999265035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.20356265703837076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.18963199853897095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.29333333174387616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.27346134185791016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.162581334511439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.21570666631062826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.161189337571462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.15190933148066202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.21387734015782675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.19795199235280356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.16006933649381003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.21210666497548422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.1504693329334259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.21380800008773804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.1606986622015635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.21242133776346842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.16056533654530844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.15004799763361612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.16019733746846518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.21174399058024088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.19556266069412231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.1964319944381714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.1607253352801005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.1492800017197927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.2116426626841227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.19655466079711914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.2016693353652954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.15983999768892923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.2111253341039022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,3.662965456644694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,3.679744084676107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,4.926399866739909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,3.26311461130778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,4.946042696634929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,3.7105652491251626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,4.392959912618001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,3.7240479787190757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,4.976986567179362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,3.3050400416056314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,3.7328532536824546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,4.4365386962890625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,4.980618794759114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,3.329381306966146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,3.746901194254557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,5.015055974324544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,2.115455945332845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,5.0006561279296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,4.462938626607259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,2.7656478881835938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,2.1429759661356607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,1.9554346402486165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,1.840336004892985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,2.792266527811686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,2.4772960344950357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,1.8561973571777344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,1.6441599527994792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,2.5382080078125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,2.4914719263712564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,1.853482723236084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,1.8705439567565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,2.486186663309733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,2.2147520383199057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,2.2275360425313315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,1.6575946807861328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,1.864730676015218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,2.5036746660868325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,1.8797492980957031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,2.505296071370443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.073957363764445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,2.241290728251139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,1.6728533109029133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,1.408522605895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.0925599733988445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,1.001530647277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,1.424890677134196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,2.5194506645202637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,0.9424266815185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.263002634048462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,0.9493652979532877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.2723840077718098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.2947253386179607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,0.9485387007395426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.270037333170573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,0.8454879919687907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,0.9569386641184489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,0.8528479735056559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,1.1333119869232178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.2745973269144695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.1441760063171387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,0.9534826278686523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,0.9630933602650961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.28493865331014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.1498506863911946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.5585386753082275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,0.7283946673075358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.2757866382598877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.5703680117925009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,0.5227839946746826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,0.7385973135630289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,0.6728639602661133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.49274667104085285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,0.6559679905573527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,0.8595733642578125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,0.4458986520767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.6591200033823649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.5062506596247355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,0.659231980641683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,0.45003199577331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.5950773159662882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,0.6793813705444336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.5981173515319824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.49701865514119464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.5037333170572916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,0.45286933581034344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,0.66811736424764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,0.6628106832504272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.2996053298314412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.3882773319880168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.3067733248074849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.28383467594782513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.39643200238545734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.4998933474222819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.36394135157267254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.2622293432553609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.3471200068791707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.26505066951115924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.24501333634058634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.3516960144042969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.3226880033810933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.49771734078725177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.3512106736501058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.2672853271166484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.6006133159001669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.3543413480122884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.32602133353551227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.26665600140889484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.35500800609588623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.24996799230575562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.26504000027974445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.3566186825434367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.3265226682027181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.24734934171040854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.22010666131973267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.16522666811943054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.2243573268254598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.27012266715367633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.20966400702794394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.20105600357055664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.15269866585731506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.17100266615549722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.13870400190353394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.20056533813476562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.18188265959421793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.15316266814867655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.20031466086705527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.1524799962838491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.14060800274213156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.2011786699295044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.15269333124160767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.15271466970443726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.20153599977493286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.1534986694653829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.14316800236701965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.20081067085266113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.18547199169794717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.12298132975896199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.15133333206176758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.12297067046165466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.1179253359635671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.15132799744606018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.14408533771832785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.12178132931391399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.15030399958292642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.11547733346621196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.15042666594187418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.17573332786560059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.15043200055758157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.12220266461372375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.11546132961908977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.18311999241511026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.14029332995414734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.1216159959634145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.14852266510327658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.12216533223787944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.12138666709264119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.11557333668073018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.15026666720708212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.14153066277503967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.12249066432317098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.15024532874425253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,5.210757255554199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,4.399765332539876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,4.082858721415202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,4.402160008748372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,5.191125233968099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,4.514357248942058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,5.358309427897136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,4.80405330657959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,4.501482645670573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,4.383557319641113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,5.325893402099609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,5.095386823018392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,4.577237447102864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,5.409674962361653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,4.578906695048015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,4.355781237284343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,5.359455744425456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,2.543893337249756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,2.982938766479492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,5.0971574783325195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,2.478288014729818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,2.367232004801432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,2.9081494013468423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,2.7355305353800454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,2.203978697458903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,2.613759994506836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,2.1959306399027505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,2.0344640413920083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,2.6119786898295083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,2.4144959449768066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,2.2168906529744468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,2.630330721537272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,2.216111977895101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,2.170618693033854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,2.6287946701049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,2.534287929534912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,2.639472007751465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,2.2107839584350586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,2.1516745885213218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,2.23307736714681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,1.2289120356241863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,2.6450932820638022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,2.527525266011556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,1.442639986673991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,1.1897013187408447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,1.1865546703338623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,1.4092319806416829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,1.382688045501709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,1.3182506561279297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.109503984451294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,1.010522683461507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,1.3171093463897705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.1875786781311035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.1186506748199463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.111680030822754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,1.3273119926452637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,1.0361653168996174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,1.327781359354655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.2174293200174968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.1195680300394695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,1.3277013301849365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,1.0269280274709065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.1173386573791504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,0.6188000043233236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,0.7285706996917725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.1171092987060547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,0.5975199937820435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,1.327130635579427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,0.7145013014475504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,0.6932213306427002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.565280000368754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,0.6037333408991495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,0.5282453298568726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.2173813184102376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,0.6717653274536133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.6049013137817383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.5699893236160278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,0.6760160128275553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,0.6720266342163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,0.5209920008977255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,0.6748320261637369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.6165440082550049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.5671146710713705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,0.6774240334828695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.5686293443044027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,0.5239786704381307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.5642773310343424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,0.6747146447499593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.32200000683466595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.38021334012349445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.31566399335861206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.3086559971173604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.3746933142344157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.3601333300272624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.2948746681213379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.35046398639678955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.2932373285293579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.2666933337847392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.34891732533772785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.3158773382504781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.618336002031962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.2943893273671468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.3502720197041829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.29603199164072674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.27210666735967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.3498986562093099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.3206826647122701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.2959413329760234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.35276798407236737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.2977013389269511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.5693759918212891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.27482134103775024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.3535199960072835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.3216053247451782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.2032159964243571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.16844266653060913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.16723734140396118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.1995519995689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.19312000274658203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.1553973356882731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.1843306620915731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.15530666708946228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.1434560020764669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.1839039921760559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.17011199394861856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.15507733821868896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.18405866622924805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.15519466996192932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.17221333583196005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.1837600072224935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.17138133446375528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.1568106710910797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.1862773299217224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.1479039986928304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.18567466735839844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.17406400044759116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.09675733248392741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.11547733346621196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.09582933783531189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.09588799873987834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.11398399869600932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.1148533324400584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.0867786705493927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.1053653359413147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.08662399649620056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.15591466426849365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.09548266728719075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.08834133545557658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.10466667016347249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.08752533793449402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.14497066537539163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.08235733211040497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.10493333141009013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.09603733817736308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.10596266388893127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.10532800356547038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.08771733442942302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.0800906668106715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.1046346624692281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.09725333253542583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.0543039987484614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.06620266536871593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.05442133545875549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.05230399966239929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.06646933158238728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.05345066885153452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.06423999865849812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.053264002005259194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.08718400200208028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.06437333424886067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.05947199960549673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.05421333511670431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.06473066906134288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.05212800204753876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.063701331615448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.05961599946022034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.053370664517084755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.06436266501744588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.053557331363360085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.05023466547330221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.06414400041103363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.07951466739177704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.05017066498597463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,4.262175877888997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,4.333189328511556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,4.259685198465983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,3.958127975463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,4.010565439860026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,4.322629292805989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,4.320725440979004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,4.3805545171101885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,4.3198293050130205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,4.367717425028483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,4.2749067942301435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,4.31548277537028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,4.429264068603516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,4.488416035970052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,4.413296063741048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,4.243013381958008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,4.486218770345052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,4.278106689453125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,2.4515040715535483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,2.535930633544922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,2.396618684132894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,2.280949274698893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,2.4597226778666177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,2.3267839749654136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,2.137722651163737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,2.171237309773763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,2.134981314341227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,2.1677494049072266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,1.9851466814676921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,2.147792021433512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,2.1891093254089355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,1.9670186042785645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,2.1443039576212564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,2.0902613004048667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,2.1823573112487793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,2.12829860051473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,2.1604320208231607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,2.189605394999186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,2.1896212895711265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,2.1017120679219565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,2.179327964782715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,1.192021369934082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,2.1205652554829917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.1603786945343018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,1.1454133192698162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.1863946914672852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.1605493227640789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.0780320167541504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.095088005065918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.2219040393829346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.079914649327596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,0.9789120356241862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.0965813000996907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,0.9858079751332601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.0868586699167888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.1007520357767742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.0837546984354656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,1.0181492964426677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.1010773181915283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,1.0481493473052979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.087392012278239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.1035892963409424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.0815680027008057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.1029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,0.6004853248596191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,1.0170453389485676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.6117920080820719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,0.5757439931233724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.5986133416493734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.5873333215713501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.5492800076802572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.5597333510716757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.5486133495966593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,0.49795734882354736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.5589760144551595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.5014079809188843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.5522079865137736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.5611413319905599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.5521119832992554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,0.5072799921035767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.5610080162684122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.5127679904301962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.5535253286361694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.5862559874852499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.5518133242925009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,0.5070879856745402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.5616159836451212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.5142666498819987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.3135040005048116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.32039467493693036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.30671467383702594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.30170132716496784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.3123466571172078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.30693866809209186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.28627200921376544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.29044799009958905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.2854986588160197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.2595786650975545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.2909440000851949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,1.0102880001068115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.2874720096588135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.2898133397102356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.2876373330752055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.26344533761342365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.2911466757456462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.26691200335820514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.28709866603215534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.29249600569407147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.2873973250389099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.26595733563105267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.2927839954694112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.26795732975006104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.16713066895802817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.16962132851282755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.16301866372426352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.16149333119392395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.16665066281954447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.16409599781036377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.5620266596476237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.15057599544525146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.1530133287111918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.15127467115720114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.14096533258756003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.15448000033696493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.14164800445238748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.15153066317240396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.15441067020098367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.151829332113266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.14171199997266135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.1539520025253296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.1418186624844869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.15152000387509665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.1544426679611206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.15242666999499002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.14226133624712625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.1544319987297058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.1439466675122579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.09573333462079366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.09743466973304749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.09377599755922954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.09494933485984802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.09588799873987834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.08522133032480876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.08658666412035625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.08648000160853068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.07727999985218048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.08715732892354329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.08656000097592671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.08601066470146179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.07906666894753774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.0867199997107188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.0788800021012624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.08517866333325703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.08629866441090901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.08493333061536153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.07887466748555501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.08603200316429138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.07991999884446462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.058320000767707825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.05199466645717621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.05194666484991709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.05402666827042898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.08571199576059978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.04822400212287903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.05415999889373779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.05366933345794678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.05158400038878123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.04923733572165171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.04972266654173533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.051957334081331887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.0524586687485377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.049509331583976746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.0496373325586319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.03711999952793121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.03522133330504099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.26258132855097455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.03598399957021078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.03526933242877325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.033941333492596946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,1.98964262008667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,1.959445317586263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,1.9867466290791829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,1.949402650197347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,1.7610400517781575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,1.996832052866618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,1.9525492986043294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,1.9828373591105144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,1.9211039543151855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,1.9457440376281738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,1.8232906659444172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,1.882522741953532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,1.9989760716756184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,1.9890079498291016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,1.9846827189127605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,1.9369600613911946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,1.9757653872172039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.1063626607259114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,1.8902452786763508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.0825013319651287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,1.0674986839294434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.0643786589304607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,1.0463626384735107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,1.0035786628723145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,0.9890986283620199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,0.999567985534668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,0.8987147013346354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,0.9882026513417562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,0.8786453406016032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,1.0021493434906006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,0.9850293000539144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.1163466771443684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,0.9980426629384359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,0.9328213532765707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,0.9822826385498047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,1.002794663111369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,0.9875466823577881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,0.9997066656748453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,0.9285386403401693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,0.9844533602396647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.5629226764043173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.551962653795878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,0.9095253149668375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.5490560134251913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,0.544709324836731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.5393493175506592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.5134720007578532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.5022133191426595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.5119839906692505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.45796799659729004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.5017386674880981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.4617546796798706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.5108746687571207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.5028533140818278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.5326079924901327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.5110719998677572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,0.46674664815266925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.5019146601359049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.4594133297602336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.5137493213017782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,0.9041653474171957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.5120480060577393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,0.47122132778167725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.5027093489964803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.4620426495869954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.28836800654729206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.28615466753641766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.2845653295516968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.28138667345046997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.2781813343365987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.2651360034942627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.26129599412282306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.26686400175094604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.24038932720820108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.261952002843221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.23560533920923868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.2664480010668437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.26076799631118774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.2675199906031291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.24468799432118735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.262554665406545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.2388533353805542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.2672853271166484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.2619146704673767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.2672746578852336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.24766933917999268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.26269867022832233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.24011733134587607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.1569813291231791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.15445866187413534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.1537866691748301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.1539253294467926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.15064533551534018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.15053866306940714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.14174933234850565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.14035733540852866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.14261333147684732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.13019733627637228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.14189866185188293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.12607466181119284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.14299199978510538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.14010133345921835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.1420799990495046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.13215999801953635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.14056533575057983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.12826133767763773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.1434346636136373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.1402293344338735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.14274666706720987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.13262933492660522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.14164800445238748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.1316266655921936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.293232003847758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.09012266993522644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.08852266271909077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.08864532907803853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.08985599875450134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.0885653297106425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.08867733677228291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.08248533308506012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.08066666622956593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.08243733147780101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.0806826651096344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.08085333307584126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.07914666831493378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.08169066905975342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.07412800192832947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.07956266899903615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.07227199772993724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.08124800026416779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.08107199768225352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.082096000512441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.07487466434637706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.05036266644795736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.050373335679372154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.050160000721613564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.04986133178075155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.04924799998601278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.04748799900213877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.04655466477076212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.04461866617202759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.04791999856630961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.0452106644709905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.04651199777921041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.044064000248909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.04798933366934458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.047354668378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.048714667558670044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.04614399870236715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.5029386679331461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.03481066723664602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.0341386670867602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.03473600000143051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.032645332316557564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.030602666238943737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.032111999889214836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.03199466566244761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.03251733382542928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.024869332710901897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.0235359991590182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.07572266459465027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.02430933217207591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,1.0594453016916912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,1.0593600273132324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,1.0535306930541992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,0.957914670308431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,1.0550026893615723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,1.0609227021535237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,1.0605706373850505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,1.0629279613494873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,1.0239040056864421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,1.0587519804636638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,0.9586133162180582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,1.0704906781514485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,1.0663093725840251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,1.0636320114135742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,1.0197546482086182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,1.0640373229980469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,1.0309279759724934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.5940746863683065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.593450665473938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.5938773155212402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.5670346816380819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.5815680027008057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.5678773323694865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.5373226801554362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.5387306610743204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.535647988319397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.4851253430048625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.5355466604232788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,1.0331146717071533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.5387306610743204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.5383466482162476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.5372746785481771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.5008426507314047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.5379519859949747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.501749316851298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.5417706569035848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.5418293476104736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.5401813189188639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.5037493308385214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.4865066607793172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.5031893253326416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.3069760004679362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.3056959907213847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.2991573413213094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.2943999965985616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.2988746762275696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.29312000672022503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.2776906689008077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.2770559986432393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.27940799792607623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.25296000639597577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.27716267108917236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.2542613347371419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.27923200527826947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.2779039939244588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.27775466442108154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.5394186576207479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.27711466948191327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.25969600677490234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.28091200192769367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.279530664285024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.2789439956347148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.2604586680730184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.27803200483322144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.2607626716295878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.16061333815256754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.1590986649195353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.15793599685033163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.15901333093643188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.15786133209864298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.14646933476130167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.1463520030180613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.14680000146230063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.1361066699028015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.14735999703407288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.1355946660041809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.14692800243695578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.14794133106867471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.1483519971370697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.1372266709804535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.14820266763369241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.13637333114941916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.14854933818181357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.1472053329149882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.14890133341153464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.13917332887649536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.1472640037536621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.13876799742380777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.2572159965833028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.09066133697827657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.08939733107884724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.09220266342163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.08912000060081482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.09301867087682088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.08232533435026805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.08224533498287201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.160261332988739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.07693866888682048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.076773335536321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.0831573357184728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.09087999661763509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.08204799890518188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.08271466692288716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.07623466849327087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.08297599852085114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.07648533085982005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.0839413305123647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08266133566697438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.08321066697438557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.07763200004895528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08338666955629985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.052383999029795326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.05189333359400431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.05087999999523163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.05143466591835022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.050341332952181496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.08416000008583069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.04933866858482361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.05003199974695841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.04615999758243561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.049728001157442726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.05002133548259735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.0462773342927297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.03229333211978277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.032474666833877563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.03190399954716364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.03136533250411352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.031184000273545582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.03181333343187968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.031082667410373688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.02550400048494339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.02502399931351344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.025013332565625507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.01863466699918111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.017637333522240322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.07786666850248973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.7583200136820475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.7604373296101888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.7580107053120931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.6674346923828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.6704586346944174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.7583786646525065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.7584426403045654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.75654403368632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.68012801806132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.7564160029093424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.6807893117268881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.7596960067749023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.7593173185984293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.6831626892089844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.7589600086212158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.6853493054707845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.41413867473602295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.7627893288930258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.40793601671854657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.38464001814524335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.4170773426691691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.3853706518809001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.38663466771443683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.7579360008239746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.3877439896265666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.34323732058207196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.38761067390441895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.3425813515981038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.3873279889424642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.38764798641204834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.3860479990641276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.3476693232854207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.3885600169499715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.3464053471883138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.3877333402633667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.3880266745885213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.3880213499069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.3494453430175781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.38663466771443683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.34784531593322754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.21411200364430746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.21426665782928467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.2129439910252889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.20249066750208536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.21372799078623453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.2011893391609192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.20038400093714395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.20124799013137817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.20034132401148477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.17999466260274252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.20219733317693075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.18023999532063803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.20216000080108643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.2015626629193624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.2014240026473999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.41385066509246826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.20215467611948648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.1809920072555542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.20132799943288168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.20202134052912393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.2009920080502828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.18197866280873617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.2015999952952067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.18194133043289185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.11699199676513672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.1160586675008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.11487467090288798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.11326400438944499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.11479999621709187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.38605864842732746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.10771200060844421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.10848533113797505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.10773332913716634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.0990773340066274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.10973866780598958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.0993386705716451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.10942932963371277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.10934933026631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.1074720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.09928533434867859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.10776000221570332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.09777599573135376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.10929066936175029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.1088053286075592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.1811199982961019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.11346667011578877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.10816533366839091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.09814400474230449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.06406933565934499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.06241066753864288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.062181333700815834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.10959466298421223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.09883200128873189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.06252266466617584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.0627040018637975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.05598933498064677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.0625546673933665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.0618453323841095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.06292800108591716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.05645333230495453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.05633600056171417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.06218666831652323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.058448001742362976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.062496001521746315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.06195199986298879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.04035199930270513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.040720000863075256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.040181333820025124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.03826133410135905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.04062933226426443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.03827733298142751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.0400693342089653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.036015999813874565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.058133333921432495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.03844800094763438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.03946666667858759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.04002666721741358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.02608533451954524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.02606933315594991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.025957333544890087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.025701334079106648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.026629333694775898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.020096000283956528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.0205226664741834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.020725333442290623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.02075200031201045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.02073066681623459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.018085333208243053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.604037324587504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.603488008181254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.6040586630503336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.5298293431599935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.6026080052057902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.5298773447672526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.6033120155334473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.6018933455149332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.6025866667429606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.5355679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.602399984995524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.5363200108210245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.6041173140207926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.6048800150553385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.6031893491744995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.5377333164215088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.6032053232192993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.5347679853439331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.3221279978752136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.2944800059000651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.32025599479675293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.29384533564249676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.30829334259033203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.3096853295962016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.3084213336308797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.2733173370361328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.3086400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.27291733026504517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.3199946681658427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.308624009291331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.30776532491048175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.27353066205978394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.30803199609120685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.2751839955647786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.3086400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.3099626700083415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.3081493377685547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.2757226626078288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.30905065933863324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.27566399176915485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.16953599452972412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.1688800056775411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.1690453290939331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.1586186687151591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.1674506664276123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.15872533122698465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.1609760026137034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.16054933269818625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.1625386675198873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.14450666308403015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.3208799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.1444000005722046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.3098026712735494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.16200533509254456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.16290666659673056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.14446399609247842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.16269866625467935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.14427733421325684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.1630773345629374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.1612266699473063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.16168000300725302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.14461333552996317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.1613759994506836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.14592533310254416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.08894399801890056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.08937066793441772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.08308266599973042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.08973866701126099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.08515733480453491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.08898666501045227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.08910933136940002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.08921600381533305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.08037866652011871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.07921599845091502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.08720533053080241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.09091732899347942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.16106133659680685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.08708799878756206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.08080533146858215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.08877866466840108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.08112533390522003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.08906666437784831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.08876267075538635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.16293866435686746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.08760000268618266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.08912533521652222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.08735466996828715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.08072533210118611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.052144000927607216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.05198400219281515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.05212800204753876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.05218133330345154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.052101333936055504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.04849066833655039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.04830400149027506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.050714666644732155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.05223466455936432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.05147733290990194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.04826666911443075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.05050666630268097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.05199466645717621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.05049600203831991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.04900800188382467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.033941333492596946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.05207466582457224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.03333866596221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.03498666733503342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.03374933451414108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.03183466692765554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.022954667607943218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.03425599883000056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.020090666910012562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.020128000527620316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.04816000163555145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.019941333681344986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.0207893339296182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.01985599969824155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.020687999824682873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.019717333217461903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.5227466821670532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.521941343943278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.5220213333765665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.46288001537323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.5215839942296346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.464789350827535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.5223893324534098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.4666666587193807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.5210239887237549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.466378649075826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.5226986805597941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.5222506523132324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.5213546752929688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.5215466817220052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.5219306548436483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.5214133262634277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.276037335395813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.2752799987792969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.2738506595293681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.25485867261886597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.2749386628468831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.2541866699854533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.2674400011698405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.26741333802541095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.2674293319384257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.465770681699117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.23928000529607138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.2674773335456848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.24084800481796265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.26825066407521564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.2683680057525635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.26799466212590534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.24047466119130453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.2687893311182658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.2400160034497579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.2678080002466838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.2672213315963745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.26893866062164307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.26737600564956665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.23988266785939535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.14206399520238241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.14029332995414734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.14249599973360697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.4659200112024943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.14245333274205527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.1320266624291738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.14034666617711386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.13949333628018698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.1400106648604075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.12822932998339334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.23896000782648721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.1399679978688558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.1397706667582194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.12813867131868997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.14011733730634054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.12760532895723978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.14015466968218485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.1393013298511505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.13974933822949728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.12798399726549783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.13966932892799377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.12822932998339334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.07841066519419353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.07841599980990092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.07841599980990092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.1402666668097178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.07863466441631317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.07853333155314128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.13211733102798462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.07865599791208903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.07884799937407176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.07226133346557617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.07679466903209686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.0711413323879242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.07690133154392242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.07706133524576823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.07235733171304067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.07434133191903432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.0724480003118515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.07791466514269511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.07702399790287018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.07842666904131572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.07172266642252605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.0777706652879715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.04603200157483419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.046154667933781944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.04572266836961111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.0462666650613149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.04589866598447164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.04629333317279816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.0476693312327067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.030159999926884968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.030405332644780476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.031317333380381264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.030591999491055805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.04645333190759023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.019541333119074505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,0,0.4416106541951497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,0,0.4414079984029134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,0,0.44113067785898846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,128,1,fp8,fp8,0,0.40008533000946045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,0,0.44146132469177246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,0,0.4412800073623657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,0,0.4415839910507202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,0,0.441978653271993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,128,1,fp8,fp8,0,0.3989280064900716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,0,0.44130667050679523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,64,0,1,fp8,fp8,0,0.40024534861246747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,0,0.4415573279062907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,0,0.4426186482111613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,0,0.4425813357035319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,128,1,fp8,fp8,0,0.39866665999094647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,0,0.4418826500574748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,64,0,1,fp8,fp8,0,0.4002879858016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,float16,float16,0,0.228277325630188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,float16,fp8,0,0.22644267479578653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,fp8,fp8,0,0.20774400234222412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,float16,fp8,0,0.22631466388702393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,0,1,fp8,fp8,0,0.20601600408554077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,0,0.22649067640304565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,0,0.2262399991353353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,64,0,1,fp8,fp8,0,0.39977598190307617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,fp8,fp8,0,0.20616533358891806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,0,0.22613867123921713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,64,128,1,float16,float16,0,0.22831465800603232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,0,0.22625599304835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,0,0.22843732436498007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,0,0.2266133427619934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,128,1,fp8,fp8,0,0.2068906625111898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,0,0.22634132703145346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,64,0,1,fp8,fp8,0,0.20692267020543417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,0,0.22592532634735107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,0,0.22709866364796957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,0,0.22643200556437174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,128,1,fp8,fp8,0,0.2058560053507487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,0,0.22627200682957968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,0,1,fp8,fp8,0,0.20590933163960776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,64,0,1,fp8,fp8,0,0.20593067010243735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,float16,float16,0,0.12159466743469238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,float16,float16,0,0.12195733189582825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,float16,fp8,0,0.12155733505884807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,128,1,fp8,fp8,0,0.11120532949765523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,float16,fp8,0,0.12206400434176128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,64,0,1,fp8,fp8,0,0.11146666606267293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,0,0.12185066938400269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,0,0.1216319998105367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,0,0.12179199854532878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,128,1,fp8,fp8,0,0.11121599872907002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,0,0.12213333447774251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,64,0,1,fp8,fp8,0,0.10988799730936687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,0,0.12184533476829529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,0,0.12152000268300374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,0,0.12133333086967468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,128,1,fp8,fp8,0,0.10955733060836792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,64,0,1,fp8,fp8,0,0.1097920040289561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,0,0.12160533666610718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,0,0.12149866422017415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,0,0.1220746636390686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,128,1,fp8,fp8,0,0.10977066556612651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,0,0.12144533793131511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,64,0,1,fp8,fp8,0,0.11179733276367188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,float16,float16,0,0.06869333485762279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,float16,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,0,0.2262399991353353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,float16,fp8,0,0.0685280015071233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,0,1,fp8,fp8,0,0.06420266628265381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,0,0.0684746652841568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,0,0.06834666430950165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,0,0.06834133466084798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,128,1,fp8,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,0,0.06846400101979573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,64,0,1,fp8,fp8,0,0.06433066725730896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,0,0.06806399921576183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,fp8,fp8,0,0.06224533418814341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,0,0.06868266562620799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,0,1,fp8,fp8,0,0.06320533156394958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,0,0.06853333115577698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,0,0.06818133095900218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,128,1,fp8,fp8,0,0.0625493327776591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,0,0.06823466718196869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,64,0,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,float16,float16,0,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,128,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,64,0,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,0,0.04159466673930486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,float16,float16,0,0.06828799843788147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,0,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,0,0.042090664307276406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,0,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,0,0.042768001556396484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,0,0.042037333051363625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,64,0,1,fp8,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,64,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,0,0.04155199974775314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,128,1,fp8,fp8,0,0.03841066608826319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,64,0,1,fp8,fp8,0,0.0386559988061587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,128,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,64,128,1,fp8,fp8,0,0.06273599962393443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,0,0.06929066777229309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,128,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,64,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,128,1,fp8,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,64,0,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,64,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,0,0.02091199904680252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,64,128,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,float16,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,128,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,64,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,64,128,1,fp8,fp8,0,0.020026666422684986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,fp8,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,0,0.018538666268189747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,3.3968960444132485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,3.430591901143392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,3.1319573720296225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,3.4415839513142905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,21.759503682454426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,21.750958760579426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,3.4718399047851562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,19.875408172607422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,3.1822293599446616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,3.4652907053629556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,21.808385213216145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,3.4864158630371094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,3.200394630432129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,19.919600168863933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,21.838895161946613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,3.4989226659139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,21.89476776123047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,3.518965403238932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,3.242426554361979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,21.848286946614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,19.942799886067707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,2.0146826108296714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,21.906692504882812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,2.0567092895507812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,1.9271893501281738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,11.338602701822916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,21.89232635498047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,19.976890563964844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,1.7765812873840332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,1.7899573644002278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,11.36532211303711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,1.6376320521036785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,10.355685551961264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,1.7819573084513347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,10.997520446777344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,1.7939732869466145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,1.6446560223897297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,11.030298868815104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,10.039695739746094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,1.7877705891927083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,11.033317565917969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,1.8039679527282715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,10.06273078918457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,1.6550079981486003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,11.021775563557943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,1.8016266822814941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,11.021984100341797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,10.085018793741861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,11.048650105794271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,1.8186826705932617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,1.672602653503418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,11.05447514851888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.101253350575765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.1257867018381755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,1.063109318415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,11.077200571695963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,10.089029312133789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,5.8067200978597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,0.9879840215047201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,0.9945013523101807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,5.840202967325847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,0.9207253456115723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,5.324405352274577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,0.9933173656463623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,5.6606292724609375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,0.9978079795837402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,0.9256853262583414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,5.675322850545247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,5.66541862487793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,5.179605484008789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,0.9944053490956625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,1.002021312713623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,5.1918026606241865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,0.9303092956542969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,5.6719411214192705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,1.0030399958292644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,5.686261494954427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,5.186586697896321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,5.6720428466796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,1.01146133740743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,0.9385493596394857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,0.7711573441823324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,5.677344004313151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,0.7703306674957275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,5.203455924987793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,3.1784852345784507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,2.9089813232421875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,3.182490666707357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,0.7712533473968506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,0.7712000211079916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,3.1619787216186523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,3.162282625834147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,2.9044532775878906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,5.693887710571289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,0.7697546482086182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,0.7707680066426595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,3.1556053161621094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,3.1615091959635415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,2.9020214080810547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,0.7264320055643717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,0.7706453005472819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,0.7705173492431641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,0.7268693447113037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,3.1628214518229165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,0.7693440119425455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,2.901050567626953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,3.164837201436361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,0.7697707017262777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,3.168538729349772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,0.7259946664174398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,3.1613918940226235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,2.5363094011942544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,2.9062506357828775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,2.5559627215067544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,2.322751998901367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,0.7318080266316732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,2.5432960192362466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,12.753946940104166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,2.561258633931478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,12.792203267415365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,11.652511596679688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,2.346970717112223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,2.5587199529012046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,12.801813761393229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,2.580362637837728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,2.3666186332702637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,11.677300771077475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,12.808128356933594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,2.5789920488993325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,12.790959676106771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,2.599658648173014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,2.3925013542175293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,11.698596954345703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,12.849168141682943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,1.5113439559936523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,12.845589955647787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,1.539914608001709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,1.4433333079020183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,6.726074854532878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,11.728047688802084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,12.876687367757162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,1.3315733273824055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,1.3417387008666992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,6.747754414876302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,1.2304800351460774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,6.491717020670573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,6.161781311035156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,1.3384693463643391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,6.503893534342448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,1.347541332244873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,5.927808125813802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,1.2364853223164876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,1.3406987190246582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,6.507520039876302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,5.95469856262207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,1.3558026949564617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,1.244704008102417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,6.509189605712891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,6.5118452707926435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,1.347317377726237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,1.3656105995178223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,5.954405466715495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,6.529770533243815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,6.528448104858398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,0.8277653058369955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,1.2566719849904378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,0.8478506406148275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,0.8033173084259033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,5.970858891805013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,6.541343688964844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,3.4668105443318686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,0.7452426751454672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,0.750986655553182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,3.490426699320475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,0.6957440376281738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,3.3611679077148438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,0.7478079795837402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,3.1990931828816733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,0.7550933361053467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,3.3709707260131836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,0.7008960247039795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,3.0841280619303384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,0.7527306874593099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,3.375887870788574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,0.7573813597361246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,3.3700691858927407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,0.702672004699707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,3.3749119440714517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,3.090794563293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,3.381797472635905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,3.0878400802612305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,0.7629653612772623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,0.7085813681284586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,0.7565013567606608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.584330677986145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,3.3926560084025064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,1.942911942799886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,3.387141227722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.5518293380737305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,1.9450720151265461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,3.093125343322754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,1.7819147109985352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.5835253397623698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.5828213294347128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.5849920113881429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.5516800085703532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,1.9318079948425293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.5842613379160563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,1.9359679222106934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.5846773386001587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.5513226588567098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,1.9315147399902344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.584442655245463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,1.93450133005778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,1.7846239407857258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,1.7786614100138347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.585098663965861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.5525173346201578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,1.932106653849284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.5849706729253134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,1.7816640535990398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.5851146777470907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,1.9363412857055664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,1.9388160705566406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,1.7812533378601074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,1.9323466618855794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.5516000191370646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,2.107466697692871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,2.1203039487202964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,1.9305920600891113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,2.112160046895345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,9.16647974650065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,9.179951985677084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,8.367210388183594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,2.128111998240153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,1.9459840456644695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,9.186261494954428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,2.1225706736246743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,2.1408106486002603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,8.367237091064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,9.190298716227213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,1.9578293164571126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,2.1397172609965005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,9.20315170288086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,2.160389264424642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,8.392394383748373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,9.209440231323242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,1.9820319811503093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,9.2227414449056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.257690668106079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.2830719947814941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,1.205957333246867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,9.241839726765951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,4.854842821756999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,8.415525436401367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.1123092969258626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.1218079725901287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,4.452255884806315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,4.885999997456868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,1.0303093592325847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,4.669861475626628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.1148213545481365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.1249386469523113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,4.279391924540202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,4.687909444173177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,1.0344106356302898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,4.672959963480632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.1214666366577148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.1302399635314941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,4.281903902689616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,1.0400853157043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,4.682965278625488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.1267200311024983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,4.697530746459961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,4.291226704915364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,4.698431968688965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.1403199831644695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,1.0510986646016438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,0.6911466916402181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,4.698277473449707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,0.7077279885609945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,4.715706825256348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,2.541541258494059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,0.6710933049519857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.6214720010757446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,2.541264057159424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,4.29965337117513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.6277973254521688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,2.4352426528930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,2.331823984781901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,2.44486395517985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,2.2385387420654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.6237173477808634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,0.5826773246129354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,0.5847146511077881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,2.4430294036865234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.6274933417638143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,2.4462879498799643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,2.2413973808288574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.6291466554005941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.6318773428599039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,0.5889173348744711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.6317333380381266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,2.451589266459147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.6363093455632528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,2.4547093709309897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,0.5933813254038492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,2.4475839932759604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.488261342048645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,2.2446667353312173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,2.4617387453715005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.4887839953104655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,2.247370719909668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,1.4358612696329753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.46161067485809326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.4865386486053467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,1.4385493596394856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.48795199394226074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,1.4305599530537922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.46107200781504315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.4861760139465332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,1.4270399411519368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.48679999510447186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,1.4292640686035156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.46167465051015216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,1.4299359321594238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,1.3193386395772297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,1.321290651957194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.48686401049296063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.48845867315928143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,1.3214133580525715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,1.4304745992024739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.48795731862386066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,1.4323466618855794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,1.3195040225982666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.48676268259684247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.46137066682179767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,1.4310933748881023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.4598453442255656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,1.4356320699055989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,3.2965014775594077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,1.3192959626515706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,3.0341545740763345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,3.3254880905151367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,3.3421548207600913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,12.195621490478516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,12.200363159179688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,11.106688181559244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,3.363642692565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,3.0817492802937827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,12.2444699605306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,3.3572746912638345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,3.381162643432617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,11.169381459554037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,12.24990463256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,3.1059465408325195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,3.3906240463256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,12.276480356852213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,3.41920534769694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,11.19430414835612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,12.283018747965494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,3.148106575012207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,1.925615946451823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,12.31393559773763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,1.9596373240152996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,6.449077606201172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,11.224591573079428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,12.33828862508138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,1.8351945877075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,1.677120049794515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,6.487018585205078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,5.909936269124349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,1.548741340637207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,6.137653350830078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,1.6964267094930012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,1.6827786763509114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,6.155295689900716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,5.624895731608073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,1.5540374120076497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,6.158202489217122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,1.6982879638671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,1.693328062693278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,6.17741330464681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,5.642266591389974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,1.5646719932556152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,6.162458419799805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,1.7084800402323406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,1.7064959208170574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,1.7227733929951985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,5.640522638956706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,1.5819093386332195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,6.193146387736003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,6.184885025024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,1.0028800169626872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,1.0276532967885335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,0.9677973588307699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,3.29200013478597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,5.643909454345703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,0.8894293308258057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,3.3127358754475913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,3.0280319849650064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,0.8969439665476481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,3.1512959798177085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,6.220160166422526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,3.1581761042277017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,0.8260107040405273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,2.881216049194336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,0.9005333582560221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,0.8297013441721598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,3.1567198435465493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,0.893285353978475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,0.8943946361541748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,3.163818677266439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,0.9057546456654867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,3.1669333775838218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,2.8854506810506186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,3.1715307235717773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,0.8355093002319336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,2.8957014083862305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,0.9123520056406657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,0.8447413444519043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,3.1738932927449546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,0.904853343963623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.5544000069300333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,3.182719866434733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,2.903866767883301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.5666133165359497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,1.7233600616455078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.500656008720398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,1.7413013776143391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.5072319904963175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,1.6574506759643555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,0.47144532203674316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,0.5402986605962118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.5017439921696981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,1.6612586975097656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.5066239833831787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,1.6612265904744465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,0.4723626772562663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,1.5978399912516277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.505898674329122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,1.668058713277181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,1.6638347307840984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.508458654085795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,0.4740693171819051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,1.6729973157246907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,1.5249439875284831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,1.5276320775349934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.5076693296432495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.5130560000737509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,1.6707199414571126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,0.4800693194071452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,1.5269920031229656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.39585598309834796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,1.6760640144348145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.3952159881591797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.37458133697509766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,1.0060213406880696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,0.9251733620961508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.3924853404362996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,1.5310400327046711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.3924266497294108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.37331199645996094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,1.0048426787058513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,0.9991093476613363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.39453331629435223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,0.9226293563842773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.39452799161275226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.3728319803873698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,0.9982986450195312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,0.999567985534668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.3946666717529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,1.0004479885101318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.37236801783243817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,1.0002559820810955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,0.9985386530558268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,0.9247573216756185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.394159992535909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.3947840134302775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,1.00328532854716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.3741226593653361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,1.0036373138427734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,0.9237066904703776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.39531199137369794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,2.4545599619547525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,0.9234933058420817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,2.4745705922444663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,2.250149408976237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,7.336218516031901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,2.4625066121419272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,2.4831786155700684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,7.347408294677734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,6.696570714314778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,2.274288018544515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,7.3506825764973955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,2.4780853589375815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,6.718218485514323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,7.361114501953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,2.4989493687947593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,2.292191982269287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,7.367082595825195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,2.499962647755941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,6.73683230082194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,7.383525212605794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,2.5204052925109863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,2.3194079399108887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,7.413349151611328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,1.439743995666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,1.4687093098958333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,6.762837092081706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,1.376032034556071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,3.9319626490275064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,1.2589279810587566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,7.42962646484375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,3.9648427963256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,1.2699039777119954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,1.1651039918263753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,3.72159481048584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,3.621114730834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,1.2637279828389485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,3.7281974156697593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,3.4055360158284507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,1.2746559778849285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,1.1694133281707764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,3.727754592895508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,1.2679786682128906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,3.409455935160319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,1.2807306448618572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,1.1763520240783691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,3.7386932373046875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,3.744373321533203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,1.2795626322428386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,3.7446187337239585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,3.414149284362793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,1.2922399838765461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,1.1886560122172039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,0.7582453091939291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,3.759157180786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,0.774890661239624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,3.7669652303059897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,3.430682818094889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,2.0211946169535318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,0.7313386599222819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,0.6722186406453451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,2.0381174087524414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,0.6772053241729736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,1.923866589864095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,0.6243573427200317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,1.9287412961324055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,1.7603146235148113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,1.8704533576965332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,0.6734986305236816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,0.679375966389974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,0.6284213463465372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,1.924224058787028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,0.6767093340555826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,1.9334452946980794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,0.6826666990915934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,0.631498654683431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,1.9282666842142742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,0.6809493700663248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,1.937594731648763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,1.7672640482584636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,0.6882293224334717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,0.6397440036137899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,1.9358986218770344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.42128535111745197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,1.9481706619262695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,1.770527998606364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,1.0729173024495442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.41274134318033856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,1.0824693044026692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,0.9993173281351725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,1.7761599222819011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,1.0257173379262288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.35922666390736896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,1.0269013245900471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,0.9442826906840006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.38020265102386475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,1.025760014851888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.35957332452138263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,1.0294613043467205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,0.947370688120524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.3842879931131999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.38472533226013184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.4331413507461548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.38601064682006836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.3635733524958293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,1.0332533518473308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.38815466562906903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,1.0349067052205403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,0.9495946566263834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.38328532377878827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,1.0380746523539226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,0.9511786301930746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.6458133459091187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.36608533064524335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.28546667098999023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.6453546682993571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.3011680046717326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.5984746615091959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.6415626605351766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.29974933465321857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.2837386727333069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,1.0295093059539795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.6400266488393148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.29998934268951416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.5953653256098429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.3020159999529521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.28487465778986615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.6414986848831177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.5954026778539022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.2997866670290629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.2998453378677368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.6394986708958944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.6435946623484293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.28545065720876056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.6424853404362997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.5945440133412679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.2998826702435811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.6435039838155111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.2852533260981242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.30214399099349976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.30002133051554364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.647050658861796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.5950666666030884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.2998773256937663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,3.2479092280069985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,3.2661492029825845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,2.98305606842041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,7.34442138671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,3.289125442504883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,6.696159998575847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,3.3113012313842773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,3.028256098429362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,7.358943939208984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,7.389167785644531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,3.305077234903971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,7.416074752807617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,6.753136316935222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,3.3282505671183267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,7.415706634521484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,3.0528907775878906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,3.3417867024739585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,7.439706802368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,6.767722447713216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,3.363818804423014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,7.458661397298177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,3.0947999954223633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,1.8792053858439128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,1.9084426561991374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,3.9690879185994468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,6.816383997599284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,7.472789128621419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,1.7859360376993816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,1.6289173762003581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,3.9972588221232095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,1.6430773735046387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,3.700805346171061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,1.4988959630330403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,3.6648267110188804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,1.637279987335205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,3.702202796936035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,3.3775307337443032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,1.6510400772094727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,1.5046240488688152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,3.6998186111450195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,1.6424853006998699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,3.7150932947794595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,3.3800319035847983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,1.658357302347819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,1.5156373977661133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,3.713722546895345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,1.6579467455546062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,3.7305758794148765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,3.3851518630981445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,1.675605297088623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,1.5331679979960124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,0.9649226665496826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,3.7413813273111978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,3.4114720026652017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,3.7318293253580728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,0.9822826385498047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,0.9233653545379639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,0.8468693097432455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,2.0401867230733237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,0.8514400323232015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,1.894437313079834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,0.7813440163930258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,1.8726612726847331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,1.8972105979919434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,1.730415980021159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,2.02346134185791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,0.8569866816202799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,0.783898671468099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,1.9059467315673828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,0.8521440029144287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,1.9007306098937988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,1.7349440256754558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,0.847653309504191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,0.8598612944285074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,1.8996586799621582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,1.9035520553588867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,0.8606186707814535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,1.7362826665242512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,0.8689706325531006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,0.7993919849395752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,0.7890613079071045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.5101120074590048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,1.9191093444824219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,1.0508106549580891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.5221759875615438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,0.49319998423258465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,1.9066720008850098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,1.0635626316070557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,0.9820799827575684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,1.7474239667256672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.42310933272043866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,0.9893013636271158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.4516426722208659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,0.9089226722717285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,0.9895146687825521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.4573226769765218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,0.9891253312428793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.42499732971191406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,0.9935839970906576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,0.9122560024261475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.4556320110956828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.4533013502756755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.45955200990041095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.4285600185394287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,0.9953546524047852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,0.9134026368459066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.45953599611918133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,0.9970453580220541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.4567893346150716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,0.9925066630045573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,0.9987733364105225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,0.9179893334706625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.2869759996732076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.2938506603240967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.2816320061683655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.46399998664855957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.4327253500620524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.5360693136850992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.25520533323287964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.5716053247451782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.5352480014165243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.25708266099294025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.24500799179077148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.5783626635869344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.49911999702453613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.2572000026702881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.535919984181722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.25700799624125165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.24458134174346924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.5371413230895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.5011733373006185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.25723199049631756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.25895466407140094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.5368426640828451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.24736533562342325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.5525919993718466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.500709335009257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.2624746759732564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.26145599285761517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.24943466981252035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.5444586674372355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.5032159884770712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.20565332969029745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.3601599931716919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.5388213396072388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.19591466585795084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.3616693417231242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.33697601159413654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.20185067256291708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.5429706573486328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.19328532616297403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.35683198769887287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.3330666621526082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.20134933789571127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.3571360111236572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.20170666774113974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.19151467084884644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.35567466417948407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.20574933290481567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.2016213337580363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.3572640021642049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.20164799690246582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.19141334295272827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.3566666841506958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.3312106728553772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.20381333430608115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.35868267218271893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.2035413384437561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.19358932971954346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.35809600353240967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.33273067077000934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.20323199033737183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,2.4165919621785483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.3328640063603719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.35733334223429364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,2.436090628306071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,2.2072854042053223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,4.588608105977376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,2.427285353342692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,4.188213348388672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,4.604549407958984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,4.602047920227051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,2.4501280784606934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,2.2340213457743325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,2.4443146387736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,4.626688003540039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,4.205946604410808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,2.4630026817321777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,2.2476159731547036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,2.468186696370443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,4.639898618062337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,4.63215986887614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,4.227317492167155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,2.2771093050638833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,4.657797177632649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,4.675536155700684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,2.484399954477946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,4.256805419921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,1.4321386019388835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,2.5247039794921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,1.3385119438171387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,1.4100747108459473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,1.2256960074106853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,2.544287999471029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,2.3409120241800943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.2374560038248699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,1.1262933413187664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,2.336319923400879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,1.2310826778411865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,2.12335999806722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.2430240313212078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,1.1319680213928223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,2.332373301188151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,1.2390613555908203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,2.131338596343994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,2.3243254025777182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.2507893244425456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,2.339402675628662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,1.1405493418375652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,2.3378666241963706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,1.250384012858073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,2.1379146575927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,1.260805368423462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,2.3489972750345864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,2.3501173655192056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,1.1517333189646404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,0.7285333474477133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,2.3608320554097495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,0.7437919775644938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,0.6958719889322916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.3105759620666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.20469864209493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,0.6382079919179281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,1.2956746419270833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,0.6441066662470499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,0.5909546613693237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.202837308247884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,1.0974133014678955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,0.6420160134633383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.2004799842834473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,2.1476052602132163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,0.5931626558303833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.2034026781717937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,1.1011733214060466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,0.6435199975967407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,0.6480586528778076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.1949439843495686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,0.5980000098546346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.2087093194325764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,1.1025973161061604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,0.650768001874288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,0.6562666495641073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,0.650816003481547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,0.603925347328186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.2160267035166423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,1.109877347946167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.38863468170166016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.3972533146540324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.205082654953003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.3739519913991292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,0.6893920103708903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.6379146575927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.34303466478983563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.6301759878794352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.32230933507283527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,0.6811786492665609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.210858662923177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.6337279876073202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.5841333468755087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.34293333689371747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.3466879924138387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.32408533493677777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.6355466842651367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.3462080160776774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.6334559917449951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.348416010538737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.6313279867172241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.32739200194676715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.6362613439559937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.5889333486557007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.3489919900894165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.6399253209431967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.33007999261220294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.6418186823527018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.3452479839324951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.5909119844436646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.22023999691009521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.3758133252461751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.22612265745798746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.3535626729329427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.21702933311462402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.3799360195795695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.1955146590868632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.34860265254974365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.18729066848754883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.5865973234176636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.3266293406486511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.19753599166870117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.35068265597025555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.36630932490030926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.19646400213241577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.18731200695037842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.351306676864624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.3267093300819397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.19723733266194662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.349178671836853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.19832533597946167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.18759467204411825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.3509440024693807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.32728532950083417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.19748800992965698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.35150933265686035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.19689599672953287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.19258666038513184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.3573226531346639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.33794665336608887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.34916265805562335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.24486400683720908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.15997866789499918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.15245866775512695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.24652800957361856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.23018133640289307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.1569973329703013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.15966399510701498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.24209600687026978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.15837333599726358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.14842666188875833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.2435306708017985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.22614399592081705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.15638933579126993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.24121065934499106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.1566986640294393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.1483786702156067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.2425653338432312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.22826667626698813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.15809067090352377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.24247999986012778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.2432159980138143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.228218674659729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.15606932838757834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.24209600687026978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.15654399991035461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.1483573317527771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.24261866013209024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.2282080054283142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.1588266690572103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.1987733244895935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,3.248175938924154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,2.9447625478108725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,3.2321812311808267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,4.933861414591472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,4.947205225626628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,4.489733378092448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,4.9855092366536455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,3.291253407796224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,2.9733546574910483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,3.280698776245117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,3.2825546264648438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,4.507930755615234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,4.99015998840332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,3.302501360575358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,2.992176055908203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,4.990640004475911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,4.999855995178223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,3.347018559773763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,4.536389350891113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,5.065562566121419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,3.0260000228881836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,3.345786730448405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,5.0618025461832685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,4.574351946512858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,1.8771732648213704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,2.7344001134236655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,1.7587893803914387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,1.8561387062072754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,1.6067040761311848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,2.7547998428344727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,2.540133317311605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,2.460399945576986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,1.6189173062642415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,1.470586617787679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,2.474928061167399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,2.24290132522583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,1.6130560239156086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,2.4717493057250977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,1.627855936686198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,2.48691733678182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,1.6222933133443196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,2.2555200258890786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,1.4771679242451985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,1.635503927866618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,2.4840906461079917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,1.6372586886088054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,2.4942026138305664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,1.4880320231119792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,1.650752067565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,2.5008959770202637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,1.5058612823486328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,2.2604692776997886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,0.9445760250091553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,2.5122666358947754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,2.282368024190267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,0.9596906503041586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,0.8960639635721842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,1.4035305976867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.293984015782674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,0.8274827003479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,1.3866400718688965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,0.7549599806467692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.262880007425944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,0.8202826976776123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,1.147701342900594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,0.8266932964324951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.2591893672943115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,0.832640012105306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,0.7616799672444662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.2691573301951091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,1.1529920101165771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,0.830736001332601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.258896032969157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.2664373715718586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,0.7646986643473307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.2737173239390056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,1.1565546989440918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,0.8367253144582113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,0.8387200037638346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,0.845690647761027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,0.7727200190226237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.2816853523254395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,1.1650666395823162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,0.7293173472086588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.4707680145899455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.2727680206298828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,0.7287253538767496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,0.6732693513234457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.4910666545232137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.4292213519414266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.6506666739781698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.5016746520996094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.6565279960632324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.6014826695124308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.43109333515167236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,0.6541226704915365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.43461334705352783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.4036480188369751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.6564160188039144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.3985280195871989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.4331680138905843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,0.6574720144271851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.43756266434987384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.40328001976013184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,0.6612853209177653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.6067786614100138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.4383946657180786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.43676265080769855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.6048266490300497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,0.6614826520284017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.4081546862920125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,0.6667253176371256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.3842453161875407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.26945600907007855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.2568693359692891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.39050666491190594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.36498133341471356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.2651679913202922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.22897066672643027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.3449546496073405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.2323039968808492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.2181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.34905068079630536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.32505067189534503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.23030932744344076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.3489760160446167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.2336906592051188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.22024534145991007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.35115734736124676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.32655467589696247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.23273066679636636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.23474133014678955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.22200000286102295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.3522293170293172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.3283466696739197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.2359573245048523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.442415992418925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.353493332862854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.22541866699854532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.6104586521784464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.331167995929718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.21746132771174112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.15437333782513937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.15039466818173727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.221178670724233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.23840532700220743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.13387733697891235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.15013333161671957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.13377599914868674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.19850132862726846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.1830880045890808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.13402133186658224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.19950934251149496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.13431466619173685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.1258240044116974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.19932266076405844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.18451199928919473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.1339893341064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.2079626719156901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.13366400202115378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.12569066882133484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.19955732425053915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.1843679944674174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.1341653366883596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.19939200083414713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.13613866766293845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.3558613459269206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.12866666913032532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.2008906602859497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.18626666069030762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.10985599954922994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.14753599961598715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.11045333743095398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.1074720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.14868799845377603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.14152533809343973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.11023466785748799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.14683733383814493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.10940800110499065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.1998186707496643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.14667733510335287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.13940266768137613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.1113759974638621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.1095306674639384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.14620799819628397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.13955733180046082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.10970667004585266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.1465173363685608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.14813333749771118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.14007467031478882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.1106719970703125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.14729066689809164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.1109226644039154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.147189329067866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.1402239998181661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,2.400298595428467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.14653333028157553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,3.2358293533325195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,2.4094346364339194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,2.1827146212259927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.199455996354421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,2.9428160985310874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,3.247621218363444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,2.443888028462728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,2.4321600596110025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,2.204005400339762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,3.269551912943522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,3.2808427810668945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,2.9612318674723306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,2.4568907419840493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,2.4608640670776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,2.2236053148905435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,3.294346809387207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,2.4813440640767417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,2.979733467102051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,3.2979841232299805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,3.3168748219807944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,2.4790239334106445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,3.31553586324056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,1.395301342010498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,2.9981120427449546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,1.831007957458496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,1.3163786729176838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,1.847274621327718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,1.7018720308939617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,1.411743958791097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.205509344736735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.2126719951629639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,1.6364906628926594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,1.6246347427368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,1.4822293917338054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.2119306723276775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,2.247173309326172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,1.634885311126709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.2197013696034749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,1.1100906531016033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,1.102778673171997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,1.645525296529134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,1.489456017812093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,1.6403412818908691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,1.117749293645223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,1.6505333582560222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,1.4978987375895183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.2188746929168701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.228277365366618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.2399946848551433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,1.1302719910939534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,1.664229393005371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,1.5112214088439941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.2292533715565999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,0.7115893363952637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,0.9338186581929525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,0.7001386483510336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,0.9457866350809733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,0.870352029800415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.6186613241831461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,0.8433813254038492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.6239946683247884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,0.7218506336212158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,0.5704160133997599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,0.8364373048146566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,0.7626826763153076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.6227893431981405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.6281919876734415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,0.5920906861623129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,1.6507466634114583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,0.8406293392181396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,0.7657012939453125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,0.6386666695276896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,0.8358666896820068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.631658673286438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,0.5768266518910726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,0.8461759885152181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,0.7708160082499186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,0.6309333244959513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,0.8447840213775635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.6369866530100504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,0.596725344657898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.37198932965596515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,0.7808000246683756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.4854346513748169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.37930134932200116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.3580533266067505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.49466665585835773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.4599519968032837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.32292266686757404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.4351360003153483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.326474666595459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,0.8513546784718832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.43911465009053546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.4036053419113159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.32499200105667114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.4331253369649251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.32797332604726154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.30618133147557575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,0.8397226333618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.406330664952596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.32622400919596356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.30405332644780475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.32895465691884357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.308186670144399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.4412746826807658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.4079626798629761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.33018134037653607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.4416373173395793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.3336160182952881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.31152000029881793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.4456319808959961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.4116640090942383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.20246932903925577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.26316799720128375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.20658133427302042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.19669866561889648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.2688213388125102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.25235732396443683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.1734399994214376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.23263466358184814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.17492800951004028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.1678239901860555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.2420533299446106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.22158400217692056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.1731520096460978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.23244265715281168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.17498666048049927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.16676799456278482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.23512534300486246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.22105065981547037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.17417067289352417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.23412799835205078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.1769919991493225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.17093332608540854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.23656533161799112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.22414400180180868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.1772800087928772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.43828801314036053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.17968533436457315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.1728960076967875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.23871999979019165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.22637333472569784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.11602133512496948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.15037332971890768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.11937066912651062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.11761599779129028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.153546671072642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.439242680867513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.10947733124097188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.13854933778444925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.10553066929181416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.09779733419418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.10459733009338379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.13822933038075766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.1048959990342458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.09914666414260864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.13825066884358725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.12849066654841104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.10523733496665955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.1381066640218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.10545066992441814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.09753066301345825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.13825066884358725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.1279306709766388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.10505066315333049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.13828800121943155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.10530666510264079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.09922132889429729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.13878400127092996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.12983466188112894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.14845866958300272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.08690667152404785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.1074720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.086517333984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.082805335521698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.10725333293279012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.10140800476074219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.08667733271916707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.10708799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.08690667152404785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.10706133643786113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.08529599507649739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.10732266306877136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.08604266246159871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.08268266419569652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.10732799768447876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.08476266264915466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.10762133200963338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.08482133348782857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.08284799754619598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.2379253307978312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.10292266805966695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.08666132887204488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.10804266730944316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.10733866691589355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.10121066371599834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.10389866431554158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.10697066783905029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,2.8630345662434897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,3.410304069519043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,2.862090746561686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,2.7635412216186523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,3.3952852884928384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,3.254762649536133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,2.8896427154541016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,3.4296318689982095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,2.8829332987467446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,2.8625758488972983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,3.4183359146118164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,3.334869384765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,3.017226537068685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,3.539792060852051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,2.9409119288126626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,2.8584213256835938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,3.5355520248413086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,3.3834079106648765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,3.046741485595703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,3.5933494567871094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,3.0612319310506186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,3.041914621988932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,3.5791893005371094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,1.6426186561584473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,3.5230026245117188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,1.9307786623636882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,1.583679993947347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,1.5793439547220867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,1.875109354654948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,1.8317386309305828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,1.7171039581298828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,1.4423519770304363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,1.7114346822102864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,1.4470346768697102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,1.4543466567993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,1.3699839909871419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,1.727728048960368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,1.4479093551635742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,1.3906292915344238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,1.7230614026387532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,1.6463680267333984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,1.4558186531066895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,1.7321386337280273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,1.4539306958516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,1.6137653986612956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,1.7271359761555989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,1.6716480255126953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,1.4700907071431477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,1.7553067207336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,1.493893305460612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,1.510789394378662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,0.7961653073628744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,1.7420746485392253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,1.7663040161132812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,0.9379680156707764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,0.7795893351236979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,0.7945333321889242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,1.4246026674906414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,0.9250240325927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,0.9205546379089355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,0.8673600355784098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,0.729861338933309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,0.679423967997233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,0.8683359622955322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,0.8051199913024902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,0.7377119859059652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,0.7314613660176595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,0.8750080267588297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,0.736016035079956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,0.7008586724599203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,0.8734773000081381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,0.7355093161265055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,0.8769706885019938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,0.7368799845377604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,0.6921866734822592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,0.875098705291748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,0.8179519971211752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,0.7445973555246989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,0.8191306591033936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,0.8860533237457275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,0.7441066900889078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,0.7492799758911133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,0.8803626696268717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.40572798252105713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,0.8730719884236654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.39556264877319336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.4069013198216756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.47228264808654785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.4723999897638957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.3779413302739461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.4466400146484375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.3737013339996338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.34991466999053955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.44677865505218506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.41233599185943604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.37787731488545734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.44602668285369873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.37483731905619305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.3567093213399251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.4803733428319295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.4460853338241577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.4200799862543742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.38039998213450116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.4484746853510539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.37565867106119794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.35602664947509766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.44946134090423584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.4179040193557739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.4538400173187256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.3819733460744222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.36604265371958417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.4521600008010864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.4299786488215129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.2544320027033488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.21183999379475912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.21291200319925943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.2511146664619446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.2569920023282369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.1978666583697001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.23432532946268717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.38230931758880615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.18531733751296997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.23544534047444662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.21897067626317343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.19724800189336142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.2344906727472941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.19667200247446695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.18725866079330444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.23427200317382812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.22150933742523193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.19614400466283163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.23634666204452515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.19590399662653604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.19908267259597778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.23598933219909668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.23265600204467773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.20469866196314493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.2384213407834371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.2143626610438029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.19150400161743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.23854400714238486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.2229386568069458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.1165013313293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.13985066612561545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.11528533697128296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.11872000495592754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.14245333274205527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.13766400019327799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.10452799995740254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.12719466288884482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.10342933734258015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.09879466891288757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.1262453297773997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.19954667488733926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.12588799993197122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.10553066929181416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.0995840032895406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.12593600153923035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.11901866396268208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.10422399640083313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.126202662785848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.1879253387451172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.09983467062314351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.12702932953834534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.11902933319409688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.10732266306877136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.12904000282287598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.10587199529012044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.10153599580128987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.10487467050552368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.12846400340398154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.12131733695665996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.07893333335717519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.06540800134340923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.06875733534495036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.07914133369922638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.08088000118732452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.07663999994595845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.06482133269309998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.06128533184528351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.07755733529726665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.07300800085067749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.06428266565004985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.07707199951012929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.06402133405208588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.061066667238871254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.07654400169849396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.07102933526039124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.06650133430957794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.07666666805744171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.06182933350404104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.07644266883532207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.07205866773923238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.06250133117039998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.07504533231258392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.06202666461467743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.07537066439787547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.07308800021807353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.05115733544031779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.04146666576464971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.05034666756788889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.03939199944337209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.04836266736189524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.10347200433413188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.04986133178075155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.03958400090535482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.04987733562787374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.04969066878159841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.050106664498647056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.04836800197760264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.04701333244641622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.06451733410358429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,2.776048024495443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,2.8200159072875977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,2.7725280125935874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,2.696832021077474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.037733333806196846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,2.7065865198771157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,2.80403200785319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,2.832554817199707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,2.8148266474405923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,2.795146624247233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,2.7610400517781577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,2.837711970011393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,2.889317194620768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,2.9667040506998696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,2.7999092737833657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,2.7915093104044595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,2.8211520512898765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,2.9506400426228843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,2.959482510884603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,2.999669392903646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,2.973445256551107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,2.962970733642578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,2.9737653732299805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,1.585349400838216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,1.6143147150675456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,2.992938677469889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,1.545904000600179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,1.5284639994303386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,1.577151934305827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,2.8426987330118814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,1.5493866602579753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,1.4000585873921711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,1.4176853497823079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,1.3947680791219075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,1.4140639305114746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,1.3432000478108723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,1.4292373657226562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.339695930480957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,1.3525172869364421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,1.4077332814534504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.3714292844136555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,1.4096479415893555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,1.4312426249186199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,1.427077293395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,1.3734134038289387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,1.4087840716044109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,1.3813014030456543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,1.4086240132649739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,1.4497493108113606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,1.4390719731648762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,1.42740265528361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,1.4594666163126628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,0.7738666534423828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,1.4299519856770833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,1.4887359937032063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,0.8209493160247803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,0.7555680274963379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,0.7662506898244222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,1.4724480311075847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,0.7673280239105225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,0.7761173248291016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,0.7091146310170492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,0.7210186322530111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,0.7080427010854086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,0.662826657295227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,0.7167786757151285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.7023359934488932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,0.7395040194193522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,0.7268746693929037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,0.7137493292490641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,0.6726933320363363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,0.7208373546600342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.6823999881744385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,0.7183840274810791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,0.7266506354014078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,0.7145546277364095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,0.7226293087005615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,0.6810133457183838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,0.7228319644927979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,0.7333172957102457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,0.7201333045959473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,0.7292533715566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,0.7599360148111979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.39373334248860675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.39951467514038086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.38870398203531903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.39078934987386066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.40725334485371906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.3973280191421509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.3631360133488973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.37054399649302167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.339626669883728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.369269331296285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.3431146542231242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.3655093510945638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.3709760109583537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.3627839883168538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.3638559977213542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.34497066338857013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.3691680034001668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.3508533239364624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.3654826482137044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.372106671333313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.3652000029881795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.3468746741612752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.3710506757100423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.3501226504643758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.3701333204905192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.37648534774780273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.36976532141367596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.355621337890625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.3754719893137614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.35841067632039386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.20884267489115396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.2134079933166504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.20545599857966104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.2067626714706421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.2076853315035502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.21030932664871216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.19182932376861572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.19323732455571493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.19210666418075562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.1793760061264038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.1946400006612142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.18243199586868286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.19182932376861572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.19419199228286743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.1904159982999166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.18303465843200684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.19425066312154135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,0.7332053184509277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.1929759979248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.19589867194493613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.1925813357035319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.18225600322087607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.19561066230138144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.18331199884414673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.18397865692774454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.19491199652353922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.19614400466283163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.19406400124231973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.18404799699783325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.19606399536132812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.18621333440144858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.11469333370526631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.11133866508801778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.11331733067830403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.11542933185895284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.10322667161623637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,0.6717173258463541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.1023466686407725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.11268267035484314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.10593600074450175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.09826133648554485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.10203733046849568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.10435733199119568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.10233599940935771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.09680533409118652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.09763200084368388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.10346666971842448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.10455999771753947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.103301336367925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.09736532966295879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.10532266894976298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.09921600421269734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.10388799508412679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.10729599992434184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.10372799634933472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.0995360016822815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.10542933146158855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.10055999954541524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.0650133341550827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.06631466746330261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.06469866633415222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.06777599950631459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.06629333396752675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.062261333068211876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.0621973325808843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.06253866851329803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.0625439981619517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.05931733548641205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.0625439981619517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.06292800108591716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.06222933530807495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.058592001597086586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.0625439981619517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.05961066484451294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.06264000137646993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.06329066554705302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.06250666578610738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.06061866879463196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.11565867066383362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.06035199761390686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.062234664956728615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.0621973325808843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.10439466436704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.06123200058937073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.04072533299525579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.0631039987007777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.04068266600370407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.03956799954175949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.0421066681543986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.03998400022586187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.04206933577855428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.03902400036652883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.04141866664091746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.038906666139761605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.038362666964530945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.03978666663169861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.03890133400758108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.039450667798519135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.026533332963784535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.02586666742960612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.02589333305756251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.026895999908447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.057946667075157166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.2997600237528484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.026928000152111053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.027888000011444092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.2983040014902751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,1.2211840152740479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.2778240044911702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.204799969991048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.2997120221455891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.2806346416473389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.294432004292806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,1.2456586360931396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.273535966873169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.2274239857991536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.3067413171132405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.2787573337554932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.2992693583170574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,1.2560213406880696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.2769707043965657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.2367093563079834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.2791039943695068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.301802635192871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.3409546216328938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,1.3518932660420735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.2873280048370361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.332581361134847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,0.7205920219421387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,0.7083679835001627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.3223466873168945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,0.7146720091501871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,0.6929813226064047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.7003786563873291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.659770647684733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.6524373292922974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.6584479808807373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,0.6184000174204508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.6463733514149984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.6174826622009277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,0.7077279885609945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.658735990524292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.6467093229293823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.6550399859746298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,0.6246506770451864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.6457706689834595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.6103466749191284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.6612213452657064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.6545759836832682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.659877339998881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.6487199862798055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.610586682955424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.6693546772003174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.6565066576004028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.6630400021870931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,0.6732532978057861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.6529066562652588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.6590346495310465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.3591253360112508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.3617386817932129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.37939735253651935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.354751984278361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.36211200555165607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.3357386589050293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.33166933059692383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.33560001850128174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.3123466571172078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.3290560046831767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.30537599325180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,0.6248426834742228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.33566399415334064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.33036800225575763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.3341279824574788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.3184266686439514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.3282559911410014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.3125866651535034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.3375946680704753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.33268266916275024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.33722134431203205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.3173759977022807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.3316906690597534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.3447519938151042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.33694934844970703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.33984001477559406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.3672373294830322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.3279520074526469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.3200213313102722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.19550933440526327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.19088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.1912213365236918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.19473065932591757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.18844799200693765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.17830399672190347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.17568532625834146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.1788426637649536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.3367413282394409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.1664906640847524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.1740000049273173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.1622666617234548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.17730132738749185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.17426133155822754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.17733333508173624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.19347200791041055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.17280532916386923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.16453867157300314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.17708265781402588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.1752906640370687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.17787732680638632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.16771199305852255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.17484267552693686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.1658560037612915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.1793066660563151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.17875200510025024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.3091520071029663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.17905600865681967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.17197332779566446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.17638933658599854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.16796799500783285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.10296533505121867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.10197866956392924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.10717333356539409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.10150933265686035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.105621337890625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.09494933485984802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.0930560032526652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.09530133008956909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.08914132912953694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.0930613378683726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.08691733082135518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.09333866834640503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.09268266956011455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.09388800462086995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.0890880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.09269866347312927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.10469866792360942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.093941330909729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.09344533085823059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.09528533617655437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.08930133779843648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.09301867087682088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.08764266967773438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.09527466694513957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.09433066844940186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.09502933422724406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.09224533041318257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.09347732861836751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.09089600046475728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.06181866427262624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.061103999614715576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.06122133135795593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.05982399980227152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.08684800068537395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.1692319909731547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.05884799857934316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.05629333357016245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.05588266750176748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.05863999823729197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.0562666654586792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.05455466608206431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.05864533285299937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.05759466687838236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.05638933181762695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.057333335280418396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.057855998476346336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.05739733576774597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.05694933235645294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.03751466671625773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.03618666778008143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.03636800001064936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.03669866671164831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.03399466723203659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.03595733394225439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.03616533428430557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.03578133384386698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.03714666763941447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.036277333895365395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.036271999279658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.03505599995454153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.03586133321126302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.03694933404525121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.025663999219735462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.023728000621000927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.02495466669400533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.02053333322207133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.020874666670958202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.02083733429511388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.6981973648071289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.6980853080749512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.6942773660024008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.6435093482335409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.03703466554482778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.6446933348973592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.6987573305765787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.6972106297810873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.6945119698842367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.6534666617711385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.6952959696451823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.6993439992268881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.6517920096715292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.6992479960123698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.7037920157114664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.6987093289693197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.6513386567433676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.6980053583780924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.6504853169123331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.7075466314951578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.70797332127889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.6974666913350424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.7016746997833252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.6953759988149008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.38864533106486004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.3808906475702922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.38124799728393555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.37933866182963055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.38122133413950604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.7016106446584066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.3564480145772298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.3562026818593343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.3563573360443115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.35599998633066815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.329584002494812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.3556906779607137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.3550826708475749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.3535146713256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.3337920109430949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.3544960021972656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.3363306522369385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.3595840136210124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.3579626480738322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.38808000087738037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.3573919932047526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.332586665948232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.35596799850463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.33193065722783405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.361135999361674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.36130134264628094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.3593493302663167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.3421440124511719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.36025599638621014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.2018346587816874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.2023893396059672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.19850667317708334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.20012267430623373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.19937600692113241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.199018657207489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.1865653395652771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.18638400236765543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.187008003393809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.17378133535385132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.18714666366577148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.17435733477274576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.3401706616083781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.18733332554499307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.17520000537236533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.18660267194112143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.17452265818913779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.18703999121983847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.18710933128992716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.18757865826288858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.17339199781417847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.1869866649309794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.1739893356959025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.18896534045537314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.18721065918604532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.1895786722501119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.3288266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.18980266650517783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.17645865678787231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.10971732934315999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.1095306674639384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.11053333679835002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.10748799641927083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.10973333319028218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.09941333532333374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.10025599598884583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.0992746651172638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.0913759966691335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.09916266798973083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.09249066313107808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.17737066745758057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.10010133186976115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.10025599598884583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.09330667058626811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.09931199749310811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.09283733367919922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.10110400120417277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.10132267077763875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.09994133313496907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.18925867478052774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.09321600198745728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.10054933031400044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.09380267063776652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.1011893351872762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.10128000378608704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.1013813316822052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.09587732950846355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.09954667091369629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.09590933720270793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.06215466558933258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.06417599817117055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.06158933540185293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.05808533231417338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.05493866900602976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.057999998331069946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.058042665322621666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.05628266433874766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.05809600154558817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.058703998724619545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.05879466732343038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.05463466544946035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.058677335580190025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.05806933343410492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.05584533512592316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.037903999288876854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.036288000643253326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.037392000357309975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03751466671625773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.0365226666132609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.026549334327379864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.03825599948565165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.10027199983596802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.05936000247796377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.018181333939234417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.017653333644072216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.018101333330074947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.5074079831441244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.5068106651306152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.5050773223241171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.4538293282190959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.5061920086542765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.4525973399480184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.5050186713536581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.5045599937438965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.504202683766683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.4562133153279622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.45741868019104004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.5039413372675577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.5057493448257446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.5021599928538004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.4524480104446411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.5029866695404053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.4525066614151001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.5036906798680624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.5087786515553793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.5086506605148315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.5079040129979452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.4612693389256795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.506389339764913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.46247466405232746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.27478400866190594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.2748533288637797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.2717439929644267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.259226659933726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.2723413308461507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.2616479992866516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.26108266909917194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.2607146700223287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.26023467381795246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.23468265930811563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.2609440088272095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.2342026631037394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.2597866654396057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.260154664516449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.2603306571642558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.23569599787394205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.25962666670481366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.23669334252675375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.260970671971639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.26020266612370807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.2353066603342692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.2606400052706401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.2334346572558085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.26258667310078937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.26126400629679364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.2600213289260864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.23774933815002441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.26055999596913654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.2385173241297404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.1455680032571157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.14351466298103333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.13822399576505026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.13461333513259888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.260970671971639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.14386666814486185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.12179199854532878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.14437333742777506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.12231999635696411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.13715199629465738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.1370560030142466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.13571199774742126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.13659733533859253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.136245330174764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.13620266318321228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.12359999616940816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.13673599561055502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.135861337184906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.12450133760770161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.13636266191800436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.1241386632124583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.12326932946840923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.13703999916712442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.13455999890963236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.12704533338546753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.13697600364685059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.12558399637540182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.07913066446781158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.0809440016746521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.13711999853452048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.07899199922879536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.07916800181070964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.13622933626174927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.07859200239181519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.07539199789365132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.07700266440709432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.07586133480072021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.07638933261235555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.07076266904671986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.07669333120187123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.07619200150171916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.0763626645008723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.07065600156784058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.08062933385372162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.0765173335870107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.07701866825421651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.07542933523654938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.13554666439692178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.07500799993673961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.0703306645154953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.07496533294518788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.07603733241558075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.07462400197982788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.07134399811426799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.07201600074768066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.0462666650613149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.045514668027559914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.04574933151404063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.04586666822433472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.04201066493988037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.04281599819660187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.045968001087506614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.04654933512210846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.04413333535194397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.04428266485532125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.04586133360862732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.042208001017570496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.045834665497144066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.044666667779286705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.0432586669921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.043696001172065735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.030271999537944794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.07125333448251088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.03009066730737686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.030645333230495453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.028207999964555103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.030293333033720653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.03181866556406021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.02274666726589203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.022661333282788593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.020421333611011505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.4057706594467163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.40540798505147296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.3619680007298787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.36109864711761475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.40655465920766193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.40597331523895264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.4037439823150635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.3633973201115926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.4035733143488566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.40513598918914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.406277338663737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.4047413269678752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.4044640064239502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.3598506848017375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.36351998647054035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.361407995223999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.4060800075531006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.40596266587575275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.4041653474171956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.36586666107177734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.4052746693293254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.2159093419710795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.40516801675160724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.21611199776331583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.21598400672276816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.20179200172424316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.21544533967971802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.2011893391609192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.20801599820454916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.2084533373514811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.20777599016825357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.1858560045560201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.20822399854660034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.1853920022646586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.40440531571706134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.20781866709391275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.20758400360743204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.185370663801829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.2076639930407206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.1852746605873108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.20851200819015503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.20836800336837769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.20747200647989908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.1876159906387329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.20785067478815714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.18730133771896362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.20798933506011963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.21036799748738608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.20972800254821777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.18830400705337524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.20868800083796182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.189082662264506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.11482666929562886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.1136853297551473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.10934933026631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.11552533507347107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.10984533031781514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.11178666353225708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.3646186590194702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.20973867177963257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.10102933645248413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.11152000228563945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.10116799672444661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.11180266737937927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.11165866255760193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.11041067043940227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.10156266887982686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.11030933260917664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.10130133231480916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.11154666543006897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.11175466577212016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.10990400115648906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.11247467001279195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.10104533036549886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.10095466176668803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.11150933305422465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.11137066284815471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.1114026705423991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.10331733028093974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.11142933368682861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.10339732964833577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.06579199930032094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.06440000236034393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.060122668743133545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.062362665931383766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.0621066689491272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.0633653352657954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.05792533357938131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.06460266808668773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.05850133299827576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.06433066725730896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.06373866895834605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.06429333488146464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.058677335580190025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.06390400230884552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.05852800110975901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.06413333117961884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.05884799857934316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.06414400041103363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.06282666822274525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.06407999992370605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.06409066418806712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.06010666489601135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.03807999938726425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.03812800099452337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.0379573330283165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.037589333951473236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.038704000413417816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.03806933263937632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.02701333413521449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.02497066557407379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.027045334378878277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.062368000547091164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.11156266927719116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.02057066683967908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.019679999599854153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.0200853335360686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.020714666694402695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.35231999556223553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.3513013521830241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.3495253324508667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.31378666559855145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.3512266476949056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.31426666180292767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.3515946865081787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.3521493275960286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.3511893351872762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.3147520025571187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.35153599580128986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.31269333759943646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.3535786469777425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.3518773317337036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.35231467088063556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.3141760031382243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.3145493268966675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.3512586752573649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.3531999985376994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.3507946729660034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.3177066644032796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.31782400608062744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.18532800674438477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.18708799282709757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.18411733706792197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.17311465740203857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.18395199378331503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.17364799976348877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.18105065822601318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.18258132537206015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.18099733193715414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.16596266627311707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.18085867166519165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.35415466626485187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.18150933583577475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.181002676486969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.35128533840179443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.16547200083732605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.1811573306719462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.16583466529846191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.18099733193715414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.18158400058746338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.18092799186706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.1668213407198588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.1820746660232544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.165994664033254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.18214933077494302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.1820639967918396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.18126400311787924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.18204265832901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.16537599762280783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.18144534031550089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.16686399777730307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.09939733147621155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.09935466448465984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.09116799632708232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.09922132889429729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.09117866555849712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.09744532903035481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.09877333045005798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.09718400239944458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.09079999725023906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.09897067149480183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.08925867080688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.09760000308354695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.09726933638254802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.09715732932090759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.08907199899355571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.09724266330401103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.08918399612108867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.0992693305015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.09730133414268494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.09895466764767964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.0909440020720164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.0990666647752126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.09827199578285217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.09706133604049683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.09730666875839233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.09078400333722432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.09705600142478943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.05593066910902659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.05641066531340281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.05789866546789805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.056133334835370384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.05570133527119955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.05243200063705444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.05236800014972687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.05589866638183594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.055919999877611794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.055888002117474876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.052239999175071716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.05592533449331919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.055786664287249245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.05376533170541128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.053632001082102455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.16690133015314737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.0563679983218511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.05213333169619242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.09922666351000468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.033770665526390076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.05598400036493937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.03533866753180822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.033370666205883026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.03568533311287562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.033610666791598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.024858665963013966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.026000000536441803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.023904000719388325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.024698667228221893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.05228800078233083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.024847999215126038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.025850666066010792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.025962665677070618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.0245919997493426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.023786666492621105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.025701334079106648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.016021333634853363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.016293333222468693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,0,0.2996586759885152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,0,0.29810667037963867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,0,0.29993067185084027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,128,1,fp8,fp8,0,0.27000532547632855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,0,0.2996053298314412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,64,0,1,fp8,fp8,0,0.2704373399416606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,0,0.2999573349952698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,0,0.2990453243255615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,0,0.29993067185084027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,128,1,fp8,fp8,0,0.271504004796346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,0,0.29945600032806396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,64,0,1,fp8,fp8,0,0.27135467529296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,0,0.3001919984817505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,0,0.29817066589991253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,0,0.29848533868789673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,128,1,fp8,fp8,0,0.271232008934021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,0,0.29917333523432416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,64,0,1,fp8,fp8,0,0.26971733570098877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,0,0.29958399136861164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,0,0.3002293308575948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,fp8,fp8,0,0.27128533522288006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,fp8,fp8,0,0.2714080015818278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,float16,float16,0,0.1566986640294393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,float16,fp8,0,0.15652799606323242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,128,1,fp8,fp8,0,0.14220266540845236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,0,0.15639467040697733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,fp8,fp8,0,0.1425493359565735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,0,0.15680533647537231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,0,0.1546880006790161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,0,0.15654399991035461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,0,0.15451733271280924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,128,1,fp8,fp8,0,0.1420906682809194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,0,0.15573867162068686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,64,0,1,fp8,fp8,0,0.1421333352724711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,0,0.15465600291887918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,0,0.15651200215021768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,0,0.15666666626930237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,128,1,fp8,fp8,0,0.14223466316858926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,0,0.15651200215021768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,64,0,1,fp8,fp8,0,0.14250666896502176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,0,0.15621333320935568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,0,0.15648000439008078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,0,0.15656532843907675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,128,1,fp8,fp8,0,0.1423413356145223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,0,0.2998986641565959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,0,0.15501866738001505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,64,0,1,fp8,fp8,0,0.1423893372217814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,0,0.15542399883270264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,0,0.15653866529464722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,0,0.15666666626930237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,0,0.30029867092768353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,0,0.15639999508857727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,0,1,fp8,fp8,0,0.14242666959762573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,0,0.08538132905960083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,float16,fp8,0,0.08514666557312012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,fp8,fp8,0,0.07674666742483775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,0,0.08521067102750142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,0,1,fp8,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,64,128,1,fp8,fp8,0,0.14230933785438538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,0,0.08493333061536153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,64,128,1,float16,float16,0,0.08520533641179402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,0,0.0848533312479655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,fp8,fp8,0,0.07718400160471599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,fp8,fp8,0,0.07884266475836436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,0,0.084714670976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,0,0.08514666557312012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,128,1,fp8,fp8,0,0.07675733168919881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,0,0.08482133348782857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,64,0,1,fp8,fp8,0,0.07637866834799449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,0,0.08509866396586101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,0,0.08457600076993306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,0,0.0851039985815684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,0,0.08473599950472514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,0,1,fp8,fp8,0,0.07682666679223378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,0,0.08482666810353597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,0,0.08495466907819112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,0,0.08453333377838135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,0,0.08558932940165202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,0,0.0851146678129832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,0,1,fp8,fp8,0,0.07913066446781158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,0,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,float16,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,fp8,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,64,128,1,fp8,fp8,0,0.07683200140794118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,0,0.05134933193524679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,0,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,0,0.05073600014050802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,0,0.049738665421803795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,0,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,64,128,1,fp8,fp8,0,0.07899199922879536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,64,128,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,0,0.04995200037956238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,0,0.050111999114354454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,128,1,fp8,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,64,0,1,fp8,fp8,0,0.04754666487375895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,128,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,0,0.05117333432038625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,64,0,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,0,0.050474668542544045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,0,0.050474668542544045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,0,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,float16,float16,0,0.032170665760835014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,0,0.03172266731659571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,128,1,fp8,fp8,0,0.02994133283694585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,64,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,0,0.03331200033426285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,0,0.032101333141326904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,128,1,fp8,fp8,0,0.031157332162062328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,64,0,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,0,0.03193599979082743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,0,0.03180799881617228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,128,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,64,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,0,0.033002667129039764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,128,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,64,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,64,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,64,128,1,fp8,fp8,0,0.0462773342927297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,0,0.024288001159826916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,64,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,128,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,0,0.024373332659403484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,128,1,fp8,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,64,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,0,0.023770667612552643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,128,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,64,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,0,0.051072001457214355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,0,0.019648000597953796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,64,0,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,0,0.0179626668492953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,128,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,64,0,1,fp8,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,0,0.0199946661790212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,0,0.018197332819302876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,128,1,fp8,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,2.673151969909668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,2.6899627049764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,2.401327927907308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,2.693413416544596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,16.483807881673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,14.95236841837565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,2.714837392171224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,16.47372309366862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,2.426757335662842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,16.51852289835612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,2.730997403462728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,2.7500267028808594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,2.4725546836853027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,14.980757395426432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,16.531956990559895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,1.5588800112406414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,16.593332926432293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,1.589962641398112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,1.4558933575948079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,8.568293253580729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,16.563610076904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,1.3847999572753906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,15.050576527913412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,7.807146708170573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,1.395583947499593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,1.2473866939544678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,8.611210505167643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,8.330133438110352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,1.3924907048543294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,1.4068053563435872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,1.2573440074920654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,8.35378646850586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,7.60151481628418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,1.408463954925537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,8.375930786132812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,1.4210453033447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,1.2757386366526287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,8.378549575805664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,7.594559987386067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,0.8516533374786377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,8.38429323832194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,0.869493325551351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,0.8057226339975992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,4.409536043802897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,7.617498397827148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,0.7740426858266195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,4.429253260294597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,8.420415878295898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,0.7757546901702881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,0.7040212949117025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,4.310544013977051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,4.026943842569987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,0.7755786577860514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,0.7818240324656168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,4.311530749003093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,3.9168694814046225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,0.709333340326945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,0.7837920188903809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,4.310880025227864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,0.7896533012390137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,3.9208478927612305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,4.316501299540202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,0.7164479891459147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.5960319836934408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,4.33026123046875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.5970826546351115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,2.428938706715902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,4.323295911153157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,2.4295199712117515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,3.935365358988444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.5968533356984457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.5972319841384888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.5542773405710856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,2.2104320526123047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.5547413428624471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.5988853375116984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,2.4160693486531577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,2.4284000396728516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.5984853506088257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,2.415839989980062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,2.2076427141825357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,2.418176015218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.5539999802907308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.5970026652018229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.5971359809239706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.5540159940719604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,2.4161279996236167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,2.206293265024821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,1.9786240259806316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,1.9947412808736165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,2.206117312113444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,1.7756053606669109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,2.4202613830566406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,9.67302385965983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,1.9951252937316895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,2.0120533307393393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,9.704197565714518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,1.795514742533366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,8.79640007019043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,9.73033587137858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,2.0152533849080405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,2.0331360499064126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,9.725253423055014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,1.8231306076049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,8.821242650349935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,9.731760025024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.1680106321970622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.1934293111165364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,1.0923306941986084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,9.764080047607422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,8.847712198893229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,5.088645299275716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,1.0408426920572917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,1.0479946931203206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,5.121237436930339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,0.9372479915618896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,4.642725308736165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,4.946842511494954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,1.042149305343628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,1.055999994277954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,4.950576146443685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,4.48795731862386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,4.947696050008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,1.05731201171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,0.947376012802124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,4.4942827224731445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.0692213376363118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,0.9602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,4.964138666788737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,4.963333447774251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.6412533521652222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,4.973973274230957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,4.5092159907023115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,0.6070026556650797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,2.647610664367676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.583466649055481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.6574613253275553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,2.423978646596273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.5893119970957438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,0.5334399938583374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,2.5715413093566895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.5886933406194051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,2.659711996714274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,2.3458186785380044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.5905493497848511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,0.5363893508911133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,2.57859738667806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.592682679494222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,2.5712265968322754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.5983200073242188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,0.5432480176289877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,2.3482453028361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,2.587418715159098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,2.5882719357808432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.453328013420105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,2.353679974873861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.45368532339731854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.42045334974924725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,1.4910133679707844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.4516693353652954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,1.4923572540283203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,1.364367961883545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.4530880053838094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.42018131415049237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,1.4857120513916016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,2.58078940709432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.4531786839167277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,1.4854186375935872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.45205867290496826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.42049066225687665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,1.484282652537028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.4532853364944458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,1.4862240155537922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.45182931423187256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,1.3618772824605305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,1.4869492848714192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.42051732540130615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,1.489317258199056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,1.3609226544698079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,1.3609919548034668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,1.6479733784993489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,1.6605067253112793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,1.474778652191162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,6.965066909790039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,1.6567200024922688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,6.967157363891602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,1.670090675354004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,1.487493356068929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,6.318981170654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,6.97978146870931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,1.6751999855041504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,1.6918667157491047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,6.32264518737793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,6.993882497151692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,1.5124640464782715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,0.976085344950358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,7.007557551066081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,0.9941813151041666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,3.6936801274617515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,6.35914675394694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,7.015189488728841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,0.869050661722819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,0.9120000203450521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,3.716266632080078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,3.370544115702311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,0.8764320214589437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,0.7858719825744629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,0.8753759860992432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,3.2379252115885415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,0.8811786969502767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,3.5590667724609375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,3.5753278732299805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,0.7907040119171143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,3.5698560078938804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,0.8829013506571451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,0.8923359711964926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,3.2466348012288413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,0.8032000064849854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,3.590847969055176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.537775993347168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,3.5731414159139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,3.600565274556478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,3.2561012903849282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.5526293516159058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,0.512880007425944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,1.9325599670410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.4904746611913045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.4930773178736369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.44788801670074463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,1.87337064743042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,1.9414933522542317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,1.8692800203959148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,1.7055840492248535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.4960319995880127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,1.8757440249125164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.4516959985097249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,1.7684532801310222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.4925599892934163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.49691200256347656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,1.7076373100280762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.5017866690953573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.45667731761932373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,1.8767733573913574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.3784960110982259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,1.8825440406799316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,1.7137279510498047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,1.1077386538187664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,1.8796106974283855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,1.1094026565551758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,1.0131466388702393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.3773599863052368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.37750399112701416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,1.101685365041097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.3781973520914714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.3521866798400879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.3776373465855916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,1.009050687154134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.37755199273427326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,1.1022613048553467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.35070399443308514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.3531999985376994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,1.103216012318929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,1.0109813213348389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,1.1025013128916423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.37994666894276935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.3510986566543579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,1.103061358133952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,1.012229363123576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.3798559904098511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,1.1028319994608562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,2.600309371948242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,2.620837370554606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,2.330736001332601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,2.6205973625183105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,9.288933436075846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,9.293071746826172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,2.641845385233561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,8.414805094401041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,2.3550987243652344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,9.319482803344727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,2.6553492546081543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,9.337984085083008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,8.424890518188477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,2.673370679219564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,2.4010987281799316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,1.4913919766743977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,9.368223826090494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,4.897733370463054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,8.481840133666992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,9.373711903889975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,1.3889333407084148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,1.31604798634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,4.923760096232097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,4.452122688293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,1.5203946431477864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,1.3267146746317546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,1.1777280171712239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,4.686736106872559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,1.3233173688252766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,1.333877404530843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,4.244138717651367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,4.698671976725261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,1.1876373291015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,4.699647903442383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,1.3394880294799805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,1.3521547317504883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,4.257589340209961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,4.706645329793294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,1.2078186670939128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,4.722288131713867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,0.7812480131785074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,0.798911968866984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,0.7347893714904785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,4.274117469787598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,4.740832010904948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,0.7002399762471517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,2.524511973063151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,2.5097333590189614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,0.7038133144378662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,2.414720058441162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,0.6334666808446249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,0.7028906345367432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,2.29258139928182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,2.412970701853434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,2.1921866734822593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,0.7094346682230631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,2.4322400093078613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,0.7093706925710043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,2.194154739379883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,0.7170080343882242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,0.6465866565704346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,2.427567958831787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,0.6376159985860189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,2.4360639254252114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.4355306625366211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,2.42850128809611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.44753599166870117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.4142826795578003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,1.3349653879801433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.39558398723602295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,2.20526393254598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.3985546827316284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,1.3254986604054768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,1.2757279872894287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.3631360133488973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.3963306744893392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,1.2187360127766926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,1.1637760003407795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.4005333185195923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.3673866589864095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,1.28110933303833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.40200531482696533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,1.2855093479156494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,1.1666879653930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.40607468287150067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.36950401465098065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,1.279642661412557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,1.288042704264323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.30848532915115356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,1.290767987569173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.30903999010721844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.2878506580988566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,0.7780693372090658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,0.7128480275472006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.30672534306844074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.3081226746241252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,0.7731040318806967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.2868000070254008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,1.170799970626831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,0.7736960252126058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,0.775445302327474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.30830933650334674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,0.780186653137207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.28649065891901654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,0.7765173117319742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,0.7106346289316813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.3099093238512675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,0.7751893202463785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.3087306618690491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.2864053249359131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.3080959916114807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,0.7749066352844238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,0.7123520374298096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,0.7119253476460775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,1.9245440165201824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,1.9378293355305989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,1.722442626953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,5.608373641967773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,1.9394772847493489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,5.623039881388347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,5.065840085347493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,1.738848050435384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,5.633562723795573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,1.9547573725382488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,1.96342929204305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,5.6408640543619795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,5.082640012105306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,1.9797760645548503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,1.7688105901082356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,5.651754379272461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.1206560134887695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.141162633895874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,1.0407946904500325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,5.115584055582683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,5.67569096883138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,0.9892426331837972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,3.0227626164754233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,2.7357705434163413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,1.0004159609476726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,0.8898560206095377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,2.850341478983561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,0.9954666296641032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,2.995727856953939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,2.5793174107869468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,1.0041866302490234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,0.8964853286743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,2.8599039713541665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,1.0073119799296062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,2.8648160298665366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,1.018671989440918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,2.8771254221598306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,0.9111839930216471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,2.5869654019673667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.5913920005162557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,2.8874826431274414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,1.5510613123575847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,2.8600638707478843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.6030666828155518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,0.5564213196436564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.5287733475367228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,1.4181760152180989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,2.598053296407064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,1.4787359237670898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,0.48024535179138184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,1.4835999806722004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,1.5645920435587566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.5333120028177897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.5319146712621053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.5369066794713339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,0.4841013352076213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,1.487727959950765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.5369386672973633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,1.4852053324381511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,1.4918880462646484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.5425920089085897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,0.4905866781870524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,1.4977226257324219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,1.3548852602640789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.33062400420506793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.3384639819463094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,0.8301973342895508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.31498666604359943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,1.3412639300028484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,0.8382506370544434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.2983306646347046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.3012053370475769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,0.7919253508249918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.2770933310190837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,0.7931199868520101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,0.7251413663228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.30033065875371295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,1.3461227416992188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.30204800764719647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,0.7953759829203287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,0.7965760231018066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.3060426712036133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,0.8015147050221761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.30805333455403644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,0.7656853199005127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.28229333957036334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,0.7274773120880127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,0.8035253683725992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,0.732863982518514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.5051680008570353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.23484800259272257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.21808532873789468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.5056426525115967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.4635946750640869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.23214399814605713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.4995466470718384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.2321173350016276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.2175146738688151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.4989813168843587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.2781280080477397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.46056000391642254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.23210134108861288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.23224000136057535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.2346293330192566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.21794666846593222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.49981868267059326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.4619679848353068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.23240000009536743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.5025759935379028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.23417067527770996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.2177600065867106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.5030933221181234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,2.564191977183024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.4614826838175456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,2.5791200002034507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,5.647312164306641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,2.2934773763020835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.5011680126190186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,2.583594640096029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,5.088608105977376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,2.6029440561930337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,5.682271957397461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,2.3150879542032876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,5.673402786254883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,2.6196373303731284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,5.1196746826171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,2.63754669825236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,5.724602381388347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,2.357754707336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,5.696016311645508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,1.4594240188598633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,1.481770674387614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,3.0372320810953775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,5.734581629435222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,5.161247889200847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,1.3512214024861653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,1.279743989308675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,3.061194737752279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,2.764416058858236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,1.2910079956054688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,2.8399839401245117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,1.1434720357259114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,1.2893973191579182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,2.8466933568318686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,2.5558133125305176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,1.2998186747233074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,2.852506637573242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,1.1551146507263184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,1.3046666781107585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,2.860383987426758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,2.567061265309652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,1.3182346820831299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,1.1715466976165771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,0.7501440048217773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,2.882901191711426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,2.5846667289733887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,1.5537333488464355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,0.7647253672281901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,0.6999839941660563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,0.6648106575012207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,1.566677411397298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,0.6710399786631266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,1.4591093063354492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,2.867664019266764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,1.4653706550598145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,1.420965353647868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,0.6705919901529948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,1.4651039441426594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,0.6032693386077881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,1.3173600037892659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,1.47053861618042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,1.3216533660888672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,0.6765173276265463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,0.6771679719289144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,0.697381337483724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,0.6124213139216105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,1.46943998336792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.4005333185195923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,1.4789600372314453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,0.8114666938781738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.378762682278951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,0.8225759665171305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,1.3316480318705242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,0.5987840096155802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.3575626611709595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.35899198055267334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,0.7633546988169352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.32663466533025104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,0.7680373191833496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,0.6982506910959879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.36101865768432617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.4102240006128947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,0.7698132991790771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.3286133408546448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,0.7722613016764323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,0.700597365697225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,0.7491520245869955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.3636426528294881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.36344532171885174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.3675626516342163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,0.7746559778849283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.33483731746673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.22822399934132895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,0.7049973011016846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.4461013476053874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.21613866090774536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.45229868094126385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.41517333189646405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.20326934258143106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,0.7785332997639974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.41972267627716064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.20428800582885742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.18945600589116415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.23300800720850626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.42026134332021076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.3859306573867798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.20551466941833496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.18970666329065958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.42108798027038574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.3882400194803874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.20777599016825357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.4243040084838867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.20891199509302774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.19382933775583902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.4251519838968913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.392250657081604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.4201493263244629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.2842986583709717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.16516266266504923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.1534293293952942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.2858933409055074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.2637760043144226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.16129600008328757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.2815840045611064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.16065067052841187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.1504533290863037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.28467732667922974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.25913067658742267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.16088533401489258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.2802986701329549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.16270933548609415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.15104533235232034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.28305600086847943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.26124266783396405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.16271467010180155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.2813599904378255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.15040533741315207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.2829119960467021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.25885866085688275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.16240533192952475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,1.8991626103719075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.20521599054336548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,1.9137333234151204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,1.691856066385905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,3.5381867090861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.16457066933314005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,1.9131840070088704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,3.559727986653646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,1.9269919395446777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,1.7108532587687175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,3.1830771764119468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,3.5569705963134766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,3.5698188145955405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,1.9383093516031902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,1.7381866772969563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,3.201888084411621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,3.585536003112793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.0958240032196045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,1.9544906616210938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,3.606207847595215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,3.2314399083455405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,1.9377652804056804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,1.013050635655721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,0.9646666844685873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,1.9573760032653809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,1.768463929494222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,0.9736746946970621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,0.8608427047729492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.1135413646697998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,0.9699892997741699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,1.612928072611491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,0.9789600372314453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,1.800869305928548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,1.7976586023966472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,0.8679893016815186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,1.806890646616618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,0.9806133111317953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,0.9912532965342203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,1.818405310312907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,0.8814666271209717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,1.6203625996907551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,1.830517292022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,1.6344159444173176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,0.998965342839559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.5788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,0.5295573472976685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,1.809930642445882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,1.009050687154134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.5679466724395752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,0.9171253045399984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.5026559829711914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,0.927344004313151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,0.932090679804484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,0.8516799608866373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.5064853429794312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,0.45366398493448895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.5107680161794027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,0.45708266894022626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,0.935157299041748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.5142186482747396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.5083306630452474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,0.9390133221944174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.517898678779602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,0.4643413225809733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,0.8429493109385172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,0.8491360346476237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.5288480122884115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.3121440013249715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,0.9311626752217611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.28780800104141235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.5354400078455607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.4883626699447632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.26927467187245685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.30557866891225177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.48975467681884766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.2725759943326314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.24865599473317465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.4931946595509847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.2717760006586711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.2742933432261149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.2525493303934733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,0.944165309270223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.4984639883041382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.45156268278757733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.27767467498779297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.2800053358078003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.4503573179244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.2552693287531535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.4939519961675008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.5014133453369141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.17501866817474365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.295909325281779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.16681599617004395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.29821866750717163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.49901866912841797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.15452266732851663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.2755253314971924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.45550934473673504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.1544373333454132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.17868266503016153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.28008000055948895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.2521226604779561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.15482133626937866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.2757866581281026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.15658133228619894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.14313600460688272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.2774239977200826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.25286932786305744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.15478932857513428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.14054933190345764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.14808000127474466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.2789226571718852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.2773386637369792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.12361600001653035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.19501332441965738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.27663467327753705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.12229866782824199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.11752532919247945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.19532267252604166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.18316799402236938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.12166399757067363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.1939679980278015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.11552000045776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.2571573257446289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.1790613333384196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.12371200323104858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.19523733854293823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.12371733784675598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.1156213382879893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.19401599963506064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.18042133251825967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.1216319998105367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.1945013403892517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.12356266379356384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.11537599563598633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.19542400042215982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.1792373259862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.19446933269500732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.15680000185966492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,2.589903990427653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,2.5866400400797525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,3.8785972595214844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,2.2505812644958496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,3.8703734079996743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,2.6017120679219565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,2.598202705383301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,3.414213180541992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,3.886576016743978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,2.2739200592041016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,3.893365224202474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,3.4320799509684243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,2.6988159815470376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,3.999631881713867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,2.308490594228109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,2.689743995666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,1.4470399220784504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,3.9801546732584634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,2.109605312347412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,3.4767414728800454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,1.460367997487386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,1.3339145978291829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,2.1241599718729653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,1.2635040283203125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,1.9257814089457195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,1.9117652575174968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.2752052942911785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,1.9235040346781414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,1.7109813690185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,1.2769866784413655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,1.28329070409139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,1.9258507092793782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,1.1226027011871338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,1.2893439928690593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,1.7231839497884114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,1.1357119878133137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,1.301205317179362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,1.9403786659240723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,1.1532906691233318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,1.933242638905843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,0.7370826403299967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,1.9536159833272297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,1.738368034362793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,1.0736533006032307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,1.083733320236206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,0.9835093021392822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,0.6492426792780558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,0.6537599960962931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,0.5793813467025757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,0.9840799967447916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,0.877941370010376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,0.9792959690093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,0.6533066829045614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,0.7487200101216634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,0.9842346509297689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,0.6589759985605875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,0.988970677057902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,0.882917324701945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,0.6606613397598267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,0.9917973677317301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,0.6811412970225016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,0.5935519933700562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,0.5853813489278158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,0.9966346422831217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.3851146697998047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,0.8918186823527018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.5591200192769369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.3594506581624349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.5664746761322021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.5157226721445719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,0.667365312576294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.3386506636937459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.5089173316955566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.31007999181747437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.5130026737848917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.46399466196695965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.3420906861623128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.3920480012893677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.5131253401438395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.34514665603637695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.31059734026590985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.5154186487197876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.4659413496653239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.5179839928944906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.35126399993896484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.31620800495147705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.5211199919382731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.4824053446451823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.30005866289138794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.21379733085632324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.3475253184636434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.30612266063690186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.28145066897074383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.1811093290646871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.20913066466649374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.2711946765581767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.18327999114990234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.16993600130081177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.273909330368042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.253221333026886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.1993173360824585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.27423999706904095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.3421279986699422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.17126933733622232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.27587199211120605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.25308799743652344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.18699200948079428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.2760319908459981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.19009600083033243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.17722666263580322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.2797333399454753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.2594933311144511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.12202133735020955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.1732106606165568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.12410133083661397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.18358399470647177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.18641066551208496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.16481600205103555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.11032000184059143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.16119466225306192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.10921066999435425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.09935466448465984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.1606613298257192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.14486933747927347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.10958932836850484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.16024000446001688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.1183519959449768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.1770026683807373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.14628799756368002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.10989866654078166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.1605226695537567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.10965333382288615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.10964799920717876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.09964266419410706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.08688533306121826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.12004266182581584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.08684800068537395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.1218239963054657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.11157866319020589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.08847999572753906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.1613759994506836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.12166399757067363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.1479146679242452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.12158933281898499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.11239999532699585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.08897067109743755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.082805335521698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.12105600039164226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.1116426686445872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.08793600400288899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.11987200379371643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.0890773336092631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.12153066198031108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.1113813320795695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.0890773336092631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.16200533509254456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,1.9064319928487141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,1.9122239748636882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,2.544895966847738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,2.546127955118815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,2.24619197845459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,1.6772267023722331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,1.9238559405008953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,2.5612053871154785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,1.6912852923075359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,1.924400011698405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,2.5581653912862143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,1.987328052520752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,2.625434716542562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,1.7204373677571614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,2.264154593149821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,2.594517389933268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,2.2896107037862143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,1.9562133153279622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.1000266869862874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,0.9991839726765951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,1.429418722788493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.0891733169555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,1.2933759689331055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,0.9496213595072428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.2712000211079915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,0.8463253180185953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.2798133691151936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,1.1342559655507405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,1.418058713277181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.2778613567352295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,0.9644426504770914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,0.8531839847564697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.285434643427531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.1419466336568196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,0.957477331161499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,0.9575093587239584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,0.97762664159139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,0.8653759956359863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.3020106951395671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.1560373306274414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.5707626740137736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,0.9693013032277426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,0.7243253389994303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.5642879803975424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.2922720114390056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,0.5154773394266764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,0.7348266442616781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,0.6648053328196207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.4888799985249837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.6544853448867798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.6573760112126669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.6015093326568604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.4927519957224528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.4920373360315959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.6585119962692261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.49664000670115155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,0.4557120005289714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.6620693206787109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.6647359927495321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.4394826491673787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.5034720102945963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,0.4497866630554199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.6684319972991943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.5968799988428751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.49907199541727704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.3808693488438924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.2744106650352478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.3870240052541097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.352730671564738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.25403199593226117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.29256532589594525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.5917706489562988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.3404586712519328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.25707733631134033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.2980746626853943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.3433866500854492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.3125600020090739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.3445119857788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.2387253244717916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.35336001714070636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.31600000460942584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.26180799802144367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.23534399271011353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.3487679958343506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.260042667388916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.3514933188756307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.3200160066286723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.160970667997996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.21056000391642252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.16479466358820596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.25723199049631756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.15457600355148315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.21211733420689902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.19591999053955078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.13645866513252258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.18359466393788657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.1383039951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.13075199723243713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.1860640048980713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.16938134034474692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.13820266723632812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.18583466609319052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.14059199889500937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.1293706695238749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.18725866079330444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.2656373381614685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.13929599523544312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.24229333798090616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.1423466702302297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.13390933473904928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.18953599532445273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.1771893302599589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.0906773308912913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.11935999989509583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.092031995455424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.09090666969617207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.11998933553695679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.11570666233698527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.08472533027331035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.1120799978574117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.08520533641179402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.17122133572896323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.11353066563606262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.10262399911880493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.08427733182907104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.1120693286259969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.0784853349129359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.11358400185902913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.10338667035102844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.0849173367023468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.11322133739789327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.08496000369389851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.07879466811815898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.10345066587130229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.06855999926726024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.08885332942008972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.08913066983222961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.0687360018491745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.08910399675369263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.06817600131034851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.08899733424186707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.08273600041866302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.07699733475844066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.06855999926726024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.18755199511845908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.06487466891606648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.08353599905967712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.06835199892520905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.0895146628220876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.06745600203673045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.06467199822266896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.08909866213798523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.08294933537642162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,2.2399999300638833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,2.23742405573527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,2.6596959431966147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,2.124015967051188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,2.64955202738444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,2.499791940053304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,2.249839941660563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,2.659663995107015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,2.1825386683146157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,2.6515092849731445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,2.2481120427449546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,2.5605759620666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,2.263594627380371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,2.678346633911133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,2.145962715148926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,2.6633386611938477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,1.2264320055643718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,2.5568693478902182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,2.2708959579467773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,1.2040053208669026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,1.4197813669840496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,1.3922559420267742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.1317013104756672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,1.2021866639455159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,1.3412906328837078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.13100798924764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,1.3409973780314128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.214303970336914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.1318346659342449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,1.3399465878804524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,1.0309600035349529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.1311360200246174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,1.0390079816182454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,1.4582026799519856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,1.3387999534606934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,1.2361760139465332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.142085313796997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,1.349679946899414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,1.0522879759470622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,1.3481067021687825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,1.2455253601074219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,0.7372693220774332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,0.6303679943084717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,0.5960533221562704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.135637362798055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,0.7245439688364664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,0.6968639691670736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,0.681050697962443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,0.6239360173543295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.5733760197957357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,0.5239893198013306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,0.6804959774017334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.598202665646871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,0.6816480159759521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.5751306613286337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.5744053522745768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,0.6823519865671793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.6188799937566122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.6024906635284424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.6184693177541097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,0.6894346872965494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.5797813336054484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,0.5358613332112631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,0.7134613196055094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,0.6306666533152262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,0.5276533365249634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.3826560179392497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.3194933334986369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.3089653253555298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.3772053321202596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.29738134145736694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.35493866602579754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.29606932401657104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.27372799317042035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.35154132048288983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.3223839998245239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.29716267188390094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.3554133176803589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.29815467198689777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.2737119992574056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.35813331604003906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.32260799407958984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.303056001663208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.3583466609319051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.32567999760309857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.3015999992688497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.27874133984247845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.3568426767985026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.32732800642649335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.17173333962758383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.20197866360346475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.16933866341908774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.16501333316167197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.20003199577331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.19363733132680258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.15710933009783426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.18729066848754883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.15712533394495645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.14643733700116476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.18688533703486124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.1732906699180603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.1566986640294393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.1868213415145874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.156960000594457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.1872373421986898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.1732800006866455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.15944533546765646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.18941867351531982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.15922666589419046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.15032000343004862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.18997865915298462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.17706133921941122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.09778666496276855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.11360533038775127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.0956053336461385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.09558399518330891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.364954670270284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.11166399717330933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.08691199620564778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.10531199971834819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.0874079962571462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.10518933335940044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.09667733311653137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.10557867089907329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.08841066559155782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.07851733267307281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.10531199971834819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.09562666217486064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.08929600318272908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.10616532961527507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.08763733506202698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.0812960018714269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.10552000006039937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.055498664577802025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.06642666459083557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.053727999329566956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.0653653343518575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.0625600020090739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.05379199981689453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.06451733410358429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.11373866597811381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.0650133341550827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.053823997577031456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.06518400212128957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.05359466870625814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.06576000154018402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.05259733398755392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.14688000082969666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.06438399851322174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.054474666714668274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.06525333225727081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.04051200052102407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.04049066702524821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.0383093332250913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.04489600161711375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.04491200049718221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.04426133135954539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.04510400195916494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.03808533400297165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.0461760014295578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.043061330914497375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.03921066721280416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.04574400186538696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.03896533449490865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.037903999288876854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,2.1783679326375327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.04200533529122671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,2.2119040489196777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,2.1671679814656577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,2.0852160453796387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,2.2122400601704917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,2.106405258178711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,2.187376022338867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,2.2130613327026367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,2.1719840367635093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,2.1279892921447754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,2.21891196568807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,2.199402650197347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,2.278864065806071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,2.1347039540608725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,2.1252106030782065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,2.2303519248962402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,2.1177333196004233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.1976799964904785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.2237119674682617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.1731839974721272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,1.162618637084961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.204426685969035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.1715946992238362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,2.204832077026367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.0967733065287273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.1163573265075684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.0946666399637859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,0.9978506565093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.1119413375854492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,1.0082933108011882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.0997440020243328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.114858627319336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.0965546766916912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,1.0410613218943279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.1168159643809001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.1059733231862385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.1269386609395344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.10371732711792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,1.0365280310312908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.123855988184611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.605840007464091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,1.051466703414917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.6212746699651083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.0568160216013591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.5978560050328573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,0.5796000162760416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.6103786627451578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.590336004892985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.5890346765518188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.5559786558151245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,0.5076586802800497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.5656799872716268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.513808012008667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.5847146511077881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.5706719954808553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.5591946840286255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,0.5096480051676432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.5886826515197754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.5168426831563314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.5755199988683065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.5730933348337809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.5612159967422485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,0.5183146794637045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.5726666847864786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.31646933158238727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.3295946717262268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.31029866139094037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.30032533407211304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.3163626591364543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.30403733253479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.288975993792216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.2940853238105774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.28784000873565674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.26470400889714557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.29312533140182495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.525002678235372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.28869332869847614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.2949120004971822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.28838932514190674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.264789342880249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.2942240039507548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.2693973382314046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.5635093450546265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.3132266600926717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.29181333382924396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.2721760074297587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.2970079978307088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.2750239968299866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.16757865746816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.17117865880330405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.16568000117937723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.16190933187802634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.1693920095761617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.1723733345667521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.15216533342997232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.1548960010210673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.15242133537928262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.14269333084424338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.15542933344841003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.2672853271166484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.1442346672217051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.15332266688346863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.15561599532763162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.15294399857521057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.14225600163141885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.15532799561818442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.14359466234842935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.1551359991232554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.1572533349196116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.1548960010210673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.14638400077819824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.15875200430552164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.14798933267593384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.09508267045021057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.0965173343817393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.09311466415723164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.09515733520189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.09547199805577596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.09498666723569234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.0851146678129832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.08618666728337605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.08541333675384521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.0798933357000351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.08694400389989217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.08758399883906047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.08589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.07915199796358745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.08739733695983887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.07926933467388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.08683199683825175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.08706133564313252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.08637332916259766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.07939200103282928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.08784533540407817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.08081600069999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.05570666491985321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.05620799958705902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.052111998200416565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.0525546669960022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.052383999029795326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.05268266797065735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.04807466765244802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.048901334404945374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.052815998593966164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.05323199927806854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.05175999800364176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.0537066658337911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.05343999962011973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.05340800185998281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.0487306664387385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.03827200084924698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.038975998759269714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.03825599948565165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.03770133356253306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.034714666505654655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.03755733370780945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.03734933336575826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.03700799991687139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.03722666700681051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.08794132868448894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.2933013240496318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.03892799963553747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.026591998835404713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.02646933247645696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.026778665681680042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.025663999219735462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,1.0107253392537434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.025653332471847534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,1.0061226685841878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,0.9906986554463705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,0.9936426480611166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,0.9101440111796061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,1.014250675837199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,0.9365333716074625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,0.9979893366495768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,1.0111146767934163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,0.9939253330230713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,1.0249760150909424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,1.0146026611328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,1.0217599868774414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,0.9802560011545817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,0.9516533215840658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,1.001354694366455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,0.9354666868845621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.560538649559021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.555295983950297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,0.5473493337631226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.5524106820424398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.5358560085296631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.514245351155599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.5054986476898193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.5131253401438395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,0.4701919953028361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.504202683766683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.45896001656850177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,0.9663413365681967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.5086506605148315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.5159680048624674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.5691359837849935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.5088106791178385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.4638346831003825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.5246666669845581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.5148746569951376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.5154773394266764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.5230986674626669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,0.4821386734644572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.5120586554209391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.47194135189056396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.2950826684633891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,0.4734773238499959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.29155733187993366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.2901279926300049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.28354666630427044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.28463466962178546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.2776426672935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.2679520050684611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.2635680039723714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.2664426763852437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.24424533049265543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.26288533210754395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.2690986593564351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.26499734322230023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.26945066452026367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.24804266293843588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.26315732796986896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.24099733432133993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.2733493248621623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.2683680057525635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.2786399920781453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.25165865818659466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.26827200253804523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.2453920046488444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.15762666861216226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.15574933091799417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.15549332896868387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.15345600247383118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.15332800149917603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.1511626640955607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.14221866925557455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.14062399665514627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.14284800489743552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.1309813360373179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.14036267002423605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.1286240021387736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.14243732889493307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.1406880021095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.14414933323860168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.1327306628227234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.1404159963130951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.14591999848683676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.14289066195487976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.14653333028157553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.1365013321240743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.14396799604098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.13427733381589255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.08794666330019633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.08892800410588582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.09025599559148152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.08757332960764568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.08847467104593913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.08195200065771739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.07993066807587941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.08192533254623413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.07504533231258392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.08099199831485748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.08080533146858215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.0800853321949641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.08111466467380524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.07524799803892772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.08125333487987518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.07315200070540111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.0828959991534551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.09021866321563721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.08177599807580312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.2387253244717916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.0806879997253418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.07472533484299977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.04974400003751119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.04831466575463613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.04810666541258494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.04926399886608124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.048245335618654885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.044533332188924156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.04766400158405304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.04901333153247833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.04817600051561991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.04909333089987437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.04424533247947693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.049738665421803795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.050757333636283875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.04558399816354116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.043968002001444496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.03621866554021835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.03412266572316488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.035258665680885315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.03473600000143051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03508266558249792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03398400048414866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.034202667574087776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.024298667907714844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.024959998826185863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.023647998770078022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.08214933176835378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.07561600208282471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.023728000621000927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.02568000058333079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.02197333425283432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.020703999946514767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.5455626646677653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.5444373289744059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.50382399559021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.5429973204930624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.5483680168787638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.5479040145874023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.549343983332316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.5474773248036703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.5111466646194458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.5471253395080566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.5073279937108358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.5025706688563029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5552106698354086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.5508960088094076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.5147626797358195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.553002675374349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.30821333328882855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.3089546759923299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.2951520085334778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.557370662689209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.30364267031351727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.2960746685663859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.2815893292427063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.28196799755096436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.5169386863708496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.2816426753997803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.25861867268880206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2815306584040324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.2587946653366089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.28352532784144086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.28272000948588055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.2816373308499654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.26362667481104535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.28193066517512005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.2862559954325358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.28622933228810626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.28522666295369464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.266154666741689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.28443199396133423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.30298133691151935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.16286399960517883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.16269866625467935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.16085333625475565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.15863999724388123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.16056533654530844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.261189341545105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.15068800250689188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.14991999665896097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.1504853367805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.13700800140698752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.14890666802724203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.13727466265360513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.14970133701960245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.15032000343004862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.14860799908638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.13953066865603128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.14961600303649902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.14007467031478882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.15264000495274863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.15279466907183328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.15344533324241638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.14173866311709085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.15199466546376547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.14190399646759033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.09116799632708232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.09141332904497783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.09129066268603007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.09178133805592854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.09122133255004883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.09109333157539368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.08482133348782857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.08310933411121368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.0830506682395935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.07648533085982005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.0831573357184728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08356799681981404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.2654186685880025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.07679466903209686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.08313600222269694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.07683733105659485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.08389866352081299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.0846720039844513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.08469333251317342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.07833066582679749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.07776533563931783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.05315199991067251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.05251200000445048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.05276266733805338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.05275199810663859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.05087999999523163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.04645333190759023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.049685334165891014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.05029866596062978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.05080533524354299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.04683733483155569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.05051200091838837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.15946666399637857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03408533334732056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.035301332672437034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.034629332522551216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.032746667663256325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.034117333590984344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.0317546675602595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.03377600014209747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.03324266771475474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.03393599887688955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.0323840007185936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.034602666894594826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.032032000521818794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.03480533262093862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.02384000023206075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.02436800052722295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.02499733368555705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.02407466620206833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.024693332612514496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.03387200087308884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.01851733277241389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.3898186683654785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.39079999923706055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.3898613452911377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.34932267665863037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.39073065916697186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.3478240172068278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.3920053243637085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.3922079801559448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.3903199831644694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.35227731863657635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.3958880106608073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.39162667592366535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.3550453186035156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.38890667756398517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.39348268508911133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.3543253342310588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.39561601479848224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.21565866470336914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.21361066897710165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.2018773357073466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.213210662206014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.2017973264058431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.20362667242685953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.20393067598342896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.3510400056838989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.18151466051737467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.20269334316253662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.18179200092951456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.20282133420308432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.20265066623687744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.20150933663050333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.1850879987080892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.20324800411860147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.183514674504598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.2053333322207133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.20525334278742471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.20363734165827432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.18572799364725748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.20414400100708008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.18525334199269614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.11613333225250244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.11694399515787761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.11583999792734782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.11254933476448059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.11607467134793599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.11346133550008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.11091732978820801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.11005866527557373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.1107306679089864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.09964799880981445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.11068800091743469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.20469866196314493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.09943999846776326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.11024533708890279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.10961600144704182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.10941867033640544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.09802666306495667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.10910399754842122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.0986293355623881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.11005333065986633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.10999466975529988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.11008000373840332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.09891200065612793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.11107732852300008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.09896533687909444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.06483733157316844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.0618399977684021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.21571199099222818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.06272000074386597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.062405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.06443200012048085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.05691199998060862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.05769066512584686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.06306666632493337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.06359999875227611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.06237866481145223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.06249066690603892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.0642133355140686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.06367999811967213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.06369600196679433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.05726400017738342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.06344533463319142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.05760000149408976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.04115733255942663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.040474665661652885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.03853866706291834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.040261333187421165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.04117333392302195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.038880000511805214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.04085333396991094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.03877866764863332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.027744000156720478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.026613332331180573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.026586666703224182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.02589866767326991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.3102293411890666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.3104426662127177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.31041600306828815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.2739680012067159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.2740853428840637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.31010667483011883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.31086399157842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.3084106643994649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.2766079902648926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.3089066743850708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.2755413254102071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.3099840084711711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.31276800235112506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.3125813404719035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.27931733926137287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.31204267342885333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.27876800298690796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.1705013314882914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.17069866259892783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.16929600636164346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.15867732961972555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.16894932587941489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.1588106652100881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.16267200311024985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.16356799999872842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.16264533003171286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.14461867014567056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.16289066274960837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.14380266269048056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.1639946699142456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.16250666975975037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.16312000155448914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.14410133163134256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.16262400150299072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.16328000028928122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.16356799999872842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.16400532921155295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.3105333248774211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.16296533743540445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.1443946659564972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.09088533123334248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.08918399612108867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.09114133318265279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.08347200353940327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.09059733152389526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.08813866972923279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.08902933200200398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.08084266881148021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.08839999636014302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.14616533120473227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.0885759989420573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.08146133522192638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.08920533458391826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.08901866277058919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.09015466769536336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.08906666437784831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.08208000163237254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.08961600065231323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.0812799980243047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.05420266588528951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.05399466554323832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.05129600067933401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.055306668082873024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.05401599903901418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.05375466744105021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.052501335740089417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.0537066658337911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.052789335449536644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.05277866621812185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.052704001466433205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.05230933427810669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.05359466870625814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.05018133421738943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.03381866713364919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.035018667578697205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.03391999999682108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.14542399843533835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.033759998778502144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.032655999064445496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.03473600000143051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.04972266654173533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.021898667017618816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.019509332875410717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.024192000428835552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.0201706662774086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.26874132951100665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.2690133253733317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.26948267221450806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.24099733432133993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.2690986593564351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.24064532915751138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.2678080002466838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.26948267221450806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.26734934250513714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.2416213353474935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.268832008043925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.24018667141596475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.27057600021362305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.26979732513427734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.26925333340962726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.2681173284848531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.24074665705362955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.14195199807484946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.14219733079274496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.1418506701787313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.13201066851615906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.14231999715169272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.14075199762980142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.1400373379389445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.14038399855295816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.12869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.14061333735783896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.12829867005348206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.14006400108337402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.1402186652024587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.13966400424639383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.12822399536768594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.14019200205802917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.1283573309580485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.13205867012341818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.14030399918556213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.1402506629625956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.12846400340398154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.23968533674875894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.1288746694723765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.07959466675917308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.08039466540018718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.07509333391984303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.14037332932154337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.07633600135644276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.14019733667373657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.07981866598129272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.07963733375072479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.07980266710122426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.07398400207360585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.07960000137488048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.07394133508205414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.07897066573301952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.0790719985961914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.07895466685295105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.07388799885908763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.07901866734027863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.07267733414967854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.07925866544246674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.078575998544693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.08089066545168559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.0735040009021759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.0784746656815211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.07455466687679291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.0468800018231074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.04747200012207031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.04772266745567322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.04456533491611481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.04528533418973287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.04774933556715647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.04354133208592733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.04354666670163473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.02985599885384242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.031018666923046112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.0310506671667099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.08050666749477386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.04443199932575226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.02231466770172119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.018629333625237148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.020703999946514767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.020421333611011505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,float16,float16,0,0.22830933332443237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,float16,float16,0,0.2265226642290751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,float16,fp8,0,0.22821333010991415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,128,1,fp8,fp8,0,0.20563199122746786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,float16,fp8,0,0.22819733619689941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,64,0,1,fp8,fp8,0,0.2075093388557434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,float16,float16,0,0.22842133045196533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,float16,float16,0,0.22805867592493692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,float16,fp8,0,0.2284640073776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,float16,fp8,0,0.22803199291229248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,0,1,fp8,fp8,0,0.20747200647989908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,float16,float16,0,0.2283786733945211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,float16,float16,0,0.22604266802469888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,float16,fp8,0,0.22793066501617432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,128,1,fp8,fp8,0,0.20588266849517822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,float16,fp8,0,0.22823999325434366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,64,0,1,fp8,fp8,0,0.20600533485412598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,0,0.1216213305791219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,float16,fp8,0,0.12193066875139873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,fp8,fp8,0,0.11150399843851726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,0,0.12173866232236226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,0,1,fp8,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,64,128,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,float16,float16,0,0.1202186644077301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,float16,fp8,0,0.12164800365765889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,fp8,fp8,0,0.11142399907112122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,float16,fp8,0,0.12133866548538208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,0,1,fp8,fp8,0,0.11122666796048482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,float16,float16,0,0.1218346655368805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,float16,float16,0,0.12191466490427653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,float16,fp8,0,0.12170132994651794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,128,1,fp8,fp8,0,0.11153599619865417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,float16,fp8,0,0.1216159959634145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,64,0,1,fp8,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,float16,float16,0,0.12134400010108948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,64,128,1,float16,float16,0,0.12174399693806966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,float16,float16,0,0.12151466806729634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,float16,fp8,0,0.1220746636390686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,128,1,fp8,fp8,0,0.11133866508801778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,float16,fp8,0,0.12203733126322429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,64,0,1,fp8,fp8,0,0.1116480032602946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,float16,float16,0,0.06880533198515575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,0,0.06885333359241486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,64,128,1,float16,float16,0,0.12168533603350322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,fp8,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,0,1,fp8,fp8,0,0.062405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,float16,float16,0,0.06853866577148438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,float16,float16,0,0.06863466898600261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,float16,fp8,0,0.068271999557813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,128,1,fp8,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,64,0,1,fp8,fp8,0,0.06251733501752217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,float16,float16,0,0.06824533144632976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,float16,float16,0,0.06867200136184692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,float16,fp8,0,0.06871466835339864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,128,1,fp8,fp8,0,0.06449066599210103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,64,0,1,fp8,fp8,0,0.06404266754786174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,64,128,1,float16,fp8,0,0.06863999863465627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,float16,float16,0,0.06861866513888042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,float16,fp8,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,128,1,fp8,fp8,0,0.0641599992911021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,float16,fp8,0,0.06890666484832764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,fp8,fp8,0,0.06413866579532623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,float16,float16,0,0.041663999358812966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,float16,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,fp8,fp8,0,0.040933333337306976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,float16,float16,0,0.04162133236726125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,float16,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,128,1,fp8,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,64,0,1,fp8,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,float16,float16,0,0.04187199970086416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,float16,float16,0,0.04154666761557261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,64,0,1,float16,float16,0,0.06831466654936473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,fp8,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,0,1,fp8,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,128,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,64,0,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,float16,float16,0,0.027989332874615986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,128,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,64,128,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,float16,float16,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,64,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,64,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,float16,float16,0,0.02182399978240331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,float16,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,float16,fp8,0,0.022831998765468597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,0,0.04197866717974345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,128,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,float16,fp8,0,0.019760000209013622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,fp8,fp8,0,0.01782400036851565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,64,0,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,128,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,float16,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,128,1,fp8,fp8,0,0.016314666718244553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,2.1990559895833335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,2.2133545875549316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,1.9985013008117676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,2.2144266764322915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,13.729114532470703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,12.477306365966797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,13.71682612101237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,2.2326186498006186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,2.019440015157064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,2.2491040229797363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,13.749552408854166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,2.2635040283203125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,12.518987019856771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,2.0570027033487954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,13.74630355834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,13.804805755615234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,1.2875413099924724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,1.3126506805419922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,1.2154346307118733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,7.143215815226237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,12.58075205485026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,13.838848114013672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.1402453581492107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.1500266393025715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,7.157253265380859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,6.516063690185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,1.0417493184407551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,6.969178517659505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.1505066553751628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.1587306658426921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,6.329354604085286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,6.96725336710612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,1.0500106811523438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.162021319071452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,6.980773289998372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.1751573085784912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,1.0699679851531982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,6.348096211751302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,6.970991770426433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,0.7051466306050619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,6.992144266764323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,0.7182986736297607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,0.6743893623352051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,3.680528004964193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,6.365898768107097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,7.016159693400065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.6377173264821371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.6419519980748495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,3.367685317993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,3.702357292175293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,0.5878026485443115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,3.6068639755249023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.6421706676483154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.6463413238525391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,3.289605458577474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,3.5994132359822593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,0.5917386611302694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.6476053396860758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,3.2876160939534507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,3.610981305440267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.6545653343200684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,0.6005599896113077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,3.6178080240885415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.49403198560078937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,3.6106561024983725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,3.618021329243978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,2.040986696879069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.492789347966512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.4630826711654663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.4925440152486165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,2.040895938873291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,1.8615093231201172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,3.301370620727539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.492853323618571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.4615360101064046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,2.025200049082438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.4944640000661214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,2.0314559936523438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.4986720085144043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.46110932032267254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,2.029834588368734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,1.8621919949849446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,1.8579893112182617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.4931733210881551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,2.027754624684652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.4618826707204183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,2.0393706957499185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,2.030837376912435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,1.6309866905212402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.4986826578776042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,1.6448319753011067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,1.8616746266682942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,1.4808586438496907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,8.057008107503256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,1.6407039960225422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,1.6546400388081868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,7.3457387288411455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,1.49453337987264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,8.079946517944336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,8.073914845784506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,1.664394696553548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,1.6822293599446614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,8.110074361165365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,7.351909637451172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,1.5268479983011882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,0.9647839864095052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,8.107834498087565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,0.9840853214263916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,4.248837471008301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,0.9117013613382975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,7.388746897379558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,0.8579359849294027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,8.146074930826822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,4.269093195597331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,3.8773279190063477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,0.8663573265075684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,0.7838826974232992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,4.115461349487305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,0.8662827014923096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,0.873141368230184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,4.121498743693034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,3.7540054321289062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,0.7928160031636556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,4.123824119567871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,4.131493250528972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,0.8752960364023844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,0.8852960268656412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,3.7610559463500977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,0.8048746585845947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,4.137253443400065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.5314240058263143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.544106682141622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,0.5112693309783936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,2.2133919397989907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,3.7754185994466147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.48399468262990314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,2.226245403289795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.4880373477935791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,2.16320530573527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.44993066787719727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,2.0302507082621255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,2.171999931335449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,1.9656799634297688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.4866773287455241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,4.153957366943359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.4910240173339844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.4495520194371541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.4930186669031779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,2.1579626401265464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.4968586762746175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,2.1635093688964844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.4577440023422241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,2.1644959449768066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.3744800090789795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,1.9678826332092285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,2.171786626180013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,1.9780534108479817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.3527466853459676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.2595679759979248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.3750293254852295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,1.1484479904174805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.3741333484649658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.2526400089263916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.3757280111312866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.25273601214091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.25874129931132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.37410132090250653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.37438400586446124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.251962661743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.3500479857126872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.2523732980092366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,1.1477440198262532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.37462401390075684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.37436266740163165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.251850684483846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.3509813149770101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.3511893351872762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.2524267037709553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,1.148213307062785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,1.3548800150553386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,1.3667252858479817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,1.2298186620076497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,1.1472373008728027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,1.361669381459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,5.79043706258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,1.377343972524007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,5.810778935750325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,1.2416640122731526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,5.276336034138997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,5.812421162923177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,1.3814187049865723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,5.826613108317058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,5.278085390726726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,1.2648693720499675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,5.839653650919597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,1.3989866574605305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,0.80404265721639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,0.823082685470581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,5.845765431722005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,5.311258633931478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,3.0810934702555337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,0.7188586393992106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,2.81716251373291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,0.7616159915924072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,0.725658655166626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,0.6566453377405802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,2.974069277445475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,0.7243946393330892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,3.0930986404418945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,2.980682690938314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,0.7322453657786051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,0.6618933280309042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,2.982741355895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,0.7323040167490641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,2.71125857035319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,2.7147413889567056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,0.7405386765797933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,0.6724960009256998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,2.995237350463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,2.9844961166381836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,3.005359967549642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,2.726597468058268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.45791467030843097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,1.6160532633463542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.44704000155131024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,1.6292053858439128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.40507733821868896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.4286666711171468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.4101066589355469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,1.5664000511169434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.3763306538263957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,1.5699572563171387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,1.4868532816569011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.4092586835225423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.37878398100535077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,1.5670132637023926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,1.576581319173177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,1.432048002878825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,1.4372587203979492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.41441067059834796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.41858665148417157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.4140640099843343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,1.5747839609781902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.3163093328475952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,1.5824799537658691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,1.4406986236572266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.3157599965731303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.29796799023946124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.38466131687164307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,0.8568639755249023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.31437333424886066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.3141439954439799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,0.9285386403401693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.296015997727712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,0.9350346724192301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,0.9287839730580648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.3168213367462158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,0.9338719844818115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.3163573344548543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.2959199945131938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,0.9299466609954834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,0.8552746772766113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.31631465752919513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,0.9315626621246338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.296341339747111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,0.9289173285166422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,0.9305439790089926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,0.8543252944946289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.31570667028427124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,2.1421920458475747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,2.156218687693278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,1.9430826505025227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,0.8553173542022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,2.154085318247477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,7.7120107014973955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,2.171450614929199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,7.740282694498698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,1.9628052711486816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,7.017375946044922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,2.194826602935791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,7.766053517659505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,7.760821024576823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,7.024944305419922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,2.2105654080708823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,2.0029385884602866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.2315093676249187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,7.808245340983073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.2553119659423828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,7.802640279134114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,7.081535975138347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,1.1570186614990234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.085093339284261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,4.077957471211751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,4.093925476074219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,3.7253545125325522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.095082680384318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,0.9835626284281412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,3.9077491760253906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.093295971552531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.1023680369059246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,3.546757380167643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,0.9945440292358398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,3.921658515930176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,3.9132906595865884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,3.925093332926432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,3.5511147181193032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.11844269434611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,1.0120000044504802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.1084799766540527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,3.9358933766682944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.6477599938710531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.6608533461888632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,3.94708251953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,2.098405361175537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,2.1110827128092446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,1.9200107256571453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.5771413246790568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,3.5687360763549805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.5821546713511149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,0.5318026542663574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,2.018650690714518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,1.8328906695048015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,0.6134399970372518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.5805600086847941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.5855679909388224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,2.0170507431030273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,2.0227039655049643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,2.0128480593363443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,0.5336639881134033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.5866613388061523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,1.83623472849528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,0.541754682858785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,2.03163735071818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.3593546549479167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,1.8470239639282227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.5938026507695516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.369488000869751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.34552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,1.1206666628519695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.32497066259384155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,2.0334933598836265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,1.1123733520507812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.3269546627998352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.30269867181777954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,1.0722133318583171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,1.0242453416188557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.32843200365702313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.33086933692296344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,1.072208007176717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.3059306740760803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,1.0762346585591633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,0.9828426837921143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.33083732922871906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.33477866649627686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,1.0792853037516277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.31031467517217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,1.0695679982503254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,1.0844213167826335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,0.990015983581543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,0.9793492952982584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.6596373319625854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.24328533808390299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.6587573289871216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.6059093475341797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.2547253370285034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.25482134024302167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.2568746606508891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.24008532365163168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.2568160096804301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.6530613501866659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.6029280026753744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.25382934014002484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.25674132506052655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.6535893281300863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.6558560132980347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.6543146769205729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.25704000393549603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.25592533747355145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.24073066314061484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.23862934112548828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.6556746562321981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.6028159856796265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,1.5853813489278157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,1.5992639859517415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.6025813420613607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.6565173467000326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,1.4386132558186848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,1.5956427256266277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,4.662517229715983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,4.226463953653972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,1.6097280184427898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,1.4498666127522786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,4.66926924387614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,1.6222933133443196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,4.690458615620931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,4.687882741292317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,1.6354506810506184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,4.703616142272949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,4.240330696105957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,0.9247786998748779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,1.4797013600667317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,0.9416320323944092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,2.5005760192871094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,0.869317372639974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,4.723845481872559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,2.516581376393636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,4.267104148864746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,0.8169973691304525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,0.8244000275929769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,2.374687989552816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,2.2857866287231445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,2.3772853215535483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,2.155610720316569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,0.7432479858398438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,0.8321173191070557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,0.748469352722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,2.3801120122273765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,0.8219520250956217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,0.8332266807556152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,2.3936427434285483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,2.159978707631429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,0.8426453272501627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,0.7641440232594808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.4917440017064412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,2.404773394266764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,2.1734773317972818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.5015679995218912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,1.2952160040537517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.4644213517506917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,2.4000906944274902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,1.306399981180827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.43697067101796466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,1.1895626386006672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.44037334124247235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.4067893425623576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.236074686050415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.4421173334121704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,1.126581350962321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.44443198045094806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.40676267941792804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.2431360085805256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,1.1270826657613118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.4495786825815837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.45264001687367755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.2477760314941406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.41306134064992267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.253434658050537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,1.137221336364746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.27614933252334595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,0.6973653634389242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.26444266239802044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,1.2399946848551433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,0.7051946322123209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.6455146471659342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.2456266681353251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.24866666396458945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,0.6653013229370117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.23241066932678223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,0.667136033376058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.613103985786438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.24891199668248495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.2500320076942444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,0.6689226627349854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.234224001566569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.2387680212656658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.6143893400828043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.28203733762105304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.2553600072860718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,0.6736160119374593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.23834667603174844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,0.67685333887736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.6178346474965414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.19987199703852335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,0.668842633565267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.1863306760787964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.42904531955718994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.39430399735768634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.1946400006612142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.42291732629140216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.19953600565592447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.19542400042215982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.2520586649576823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.42289066314697266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.3901066780090332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.19578667481740317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.42316265900929767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.1960266629854838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.18361065785090128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.4238293170928955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.3898719946543376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.4273546536763509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.18528532981872559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.18542933464050293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.42532801628112793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.3919573227564494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.19664533933003744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.19664533933003744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,2.110367933909098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,2.1259306271870932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,1.9086292584737141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.42362133661905926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,4.684122721354167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,2.125610669453939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,4.708005269368489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,4.243295987447103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,2.142319997151693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,4.70413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,1.929957389831543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,2.1621012687683105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,4.7260745366414385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,4.267855962117513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,2.177999973297119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,4.75655460357666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,1.9695413907368977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.2046720186869304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.2232426802317302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,4.7752641042073565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,4.304277420043945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,1.127247969309489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,1.0576480229695637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,2.5237706502278647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,2.3080533345540366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,1.0652960141499836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,2.3613120714823403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,2.542154630025228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,2.369445323944092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,0.9547573725382487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,2.13645871480306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,1.0743733247121174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,2.364448070526123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,0.9644213517506918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,1.0642346541086833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,2.3758187294006348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.0781653722127278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,2.1445493698120117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,2.3850773175557456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,0.9814186890920004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.6208693186442057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,2.4001866976420083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,2.160271962483724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.6320480108261108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,0.5847200155258179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.0908532937367756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,1.3055573304494221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.5497920115788778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,1.1876853307088215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.5558826526006063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,0.501141349474589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,1.2936480045318604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.2179306348164876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.5534773270289103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.5589066743850708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.2161866823832195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,0.5055893262227377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.215882698694865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.2220053672790527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.5611999829610189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,1.1021440029144287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.5682186683019003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.2257866859436035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.2324053446451824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,1.1162453492482503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.33088000615437824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,1.1079999605814617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.33924798170725506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.3161120017369588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,0.6881173451741537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.6300640106201172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.2922719915707906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.29582399129867554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,0.6399573485056559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.2773653268814087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.6427520116170248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.5859040021896362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.296122670173645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,0.5143839915593466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,0.643882671991984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.2752853234608968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.6474506855010986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.5889706611633301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.30157333612442017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,0.6813333034515381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,0.6498506863911947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.28175467252731323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.6525653203328451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.5947360197703043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.3768746852874756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.1950613260269165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.18504534165064493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.30472532908121747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.38255465030670166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.35142401854197186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.16888533035914102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.35278932253519696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.1689066688219706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.15659200151761374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.3546080191930135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.32442132631937665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.1707786719004313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.353765328725179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.16063466668128967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.3557066520055135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.2983679970105489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.3343520164489746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.3568586508433024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.17203734318415323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.17246399323145548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.35901331901550293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.3330079913139343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.18973867098490396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.24429333209991455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.13804800311724344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.1311199963092804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.16454933087031046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.24421866734822592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.2260106603304545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.13598933815956116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.24042133490244547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.13581333557764688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.1274133324623108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.1369653344154358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.222053329149882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.13618133465449014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.24060799678166708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.17225066820780435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.12853866815567017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.13613866766293845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.24080532789230347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.2241226633389791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.24184532960255942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.13593066732088724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.12798399726549783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.24047466119130453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.24227199951807657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.22245333592096964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.13607999682426453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,1.5645653406778972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,1.5753067334493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,1.4129707018534343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,2.935349464416504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,1.57534392674764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,2.6583733558654785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,2.9493494033813477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,1.5888373057047527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,1.4281867345174153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,2.94706662495931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,1.6046454111735027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,2.6698080698649087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,1.6162400245666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,2.961045265197754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,2.996159871419271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,1.4581599235534668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,0.9043893019358317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,2.701221466064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,1.6097119649251301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,0.9184906482696533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,2.978992144266764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,1.6268374125162761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,1.4792373975118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,0.7949546972910563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,0.803717295328776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,1.4929547309875488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,0.8449333508809408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,1.499791940053304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,1.3509227434794109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,0.8107199668884277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,1.4995360374450684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,0.7204106648763021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,0.7278186480204264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,0.8023040294647217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,0.8130773703257242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,1.356869379679362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,0.8217439651489258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,1.5092533429463704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,1.5190827051798503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,1.3705066045125325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.47113601366678876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,0.8310133616129557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.4810933272043864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.4431999921798706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,0.841317335764567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,0.7672639687856039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.4163893461227417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,1.508255958557129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.4190773169199626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,0.742527961730957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,0.7958347002665201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,0.7054186662038168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,0.7777226765950521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,0.7726720174153646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.4243733485539754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.3855146567026774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,0.7809279759724935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,0.708784023920695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.42628800868988037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,0.7843519846598307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.38176532586415607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,0.7889653046925863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.41999467213948566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,0.7148106892903646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.25403199593226117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.43122665087382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.24278932809829712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.45024534066518146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.4134666522343953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.22217599550882974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.4108479817708333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.2243786652882894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.2104853391647339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.4124213457107544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.3798346519470215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.22445333003997803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.4437493483225505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.41281068325042725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.21209599574406943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.4164373477300008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.3833706776301066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.2302773396174113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.2591200073560079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.4192266861597697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.2262453238169352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.21566933393478394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.4204373359680176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.3856853246688843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.3914080063501994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.2500213384628296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.14923733472824097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.14656000336011252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.14205867052078247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.25277332464853924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.23531200488408408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.12986133495966592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.23457066218058267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.13013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.11956266562143962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.23218133052190146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.21184533834457397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.130213330189387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.23471999168395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.1209493378798167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.23558932542800903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.2151040037473043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.1316373348236084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.23520533243815103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.1322986682256063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.12390933434168498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.2344213326772054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.21783999601999918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.10708266496658325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.16716800133387247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.10524266958236694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.10132267077763875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.16514133413632712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.15641066431999207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.10552000006039937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.16694400707880655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.10584533214569092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.10147733489672343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.16555733482042947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.1545973320802053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.10547199845314026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.16662933429082236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.10542399684588115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.10038399696350098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.1665226618448893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.15473600228627524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.10572266578674316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.16665066281954447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.10107733805974324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.16703466574350992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.23491734266281128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.13078932960828146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.10602666934331258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,2.1045653025309243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,3.1716960271199546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,1.8722666104634602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,3.1817973454793296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,2.1353012720743814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,2.1099252700805664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,2.132570743560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,1.8929333686828613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,2.8375253677368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,2.86356258392334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,3.212789217631022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,2.171109358469645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,3.238522529602051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,2.156794706980387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,1.940970738728841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.1946453253428142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,2.907813390096029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,3.2476425170898438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.205631971359253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,1.7474239667256672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,1.7589492797851562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,1.112389326095581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,1.583562692006429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,1.0651466846466064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,0.9399946530659994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,1.6072319348653157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,1.5959733327229817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,1.0429973602294922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,1.428069273630778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,1.052064021428426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,1.0599626700083415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,3.210576057434082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,0.9492053190867106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,1.605514685312907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,1.5914613405863445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,1.6174880663553874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.0780373414357503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,0.9706719716389974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,1.0719573497772217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.609333316485087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,1.622879981994629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,0.8906293710072836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.6183733145395914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,0.571178674697876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,1.4389653205871582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,0.9008639653523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,0.8218080202738444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.5352906783421835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,0.8119893074035645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.53985067208608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,0.8161333401997884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,0.7364266713460287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.5407520135243734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,1.461557388305664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,0.4859679937362671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,0.8176106611887614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.5442560116449991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,0.49033065636952716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,0.822538693745931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,0.8256800174713135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.555189331372579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,0.5013706684112549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,0.8321599960327148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,0.7405760288238525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.31958399216334027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,0.7507733503977457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.4637226661046346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.3266879916191101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.3031466603279114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.4731359879175822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.43325865268707275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.42367998758951825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.25880000988642377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.5482026735941569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.42742931842803955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.38978668053944904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.4276426633199056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.2797813415527344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.2858453392982483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.28171734015146893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.4306933482487996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.39239998658498126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.28970134258270264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.4338773488998413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.2919093370437622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.26765332619349164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.43805332978566486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.39825065930684406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.17525333166122437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.25361067056655884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.26365333795547485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.16860800981521606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.2579573392868042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.24627200762430826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.15042133132616678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.22819199164708456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.1523146629333496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.14313066999117532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.22978132963180542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.21281067530314127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.15272000432014465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.22842133045196533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.15406399965286255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.1818186640739441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.23056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.21438399950663248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.15466666221618652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.23243200778961182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.15659200151761374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.15037332971890768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.23484800259272257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.22021865844726562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.10272000233332317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.14644799629847208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.10408533612887065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.10263466835021973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.14896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.14512532949447632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.0930560032526652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.13661332925160727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.09273599584897359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.08480532964070638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.13782399892807007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.12570666273434958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.09334400296211243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.1379093329111735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.08507733543713887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.13808533549308777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.1258026659488678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.09316266576449077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.13806933164596558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.09340266386667888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.08724266290664673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.13915200034777322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1268160045146942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.07712000111738841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.10429867108662923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.07814933359622955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.07253866891066234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.2825760046641032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.09734400113423665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.07640000184377034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.10340799887975057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.07264000177383423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.1037066678206126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.14220266540845236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.09775466720263164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.07787733276685078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.1033066709836324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.07667199770609538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.07259200016657512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.10335999727249146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.09706133604049683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.07686399916807811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.10347732901573181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.076773335536321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.07269333302974701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.10335466265678406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.10354133447011311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.07680533329645793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,1.5602134068806965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,2.0895039240519204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,1.566223939259847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,1.395301342010498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,1.5736427307128906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,2.0999786059061685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,1.8704800605773926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,2.107914606730143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,1.5833919843037922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,2.112778663635254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,1.8885919253031414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,1.600154717763265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,1.607199986775716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,1.4134772618611653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,1.4407466252644856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,0.8972000281016032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,2.141973336537679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.173514684041341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,2.1330080032348633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,0.9103306929270426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,0.8352906703948975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.182266632715861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,0.7903733253479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,1.9138879776000977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,1.0520693461100261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,0.7892639636993408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,0.7076533635457357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,1.0588373343149822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,0.7911626497904459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,1.0810453097025554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,1.0600639979044597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,0.7993653615315756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,0.7177279790242513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,1.0664479732513428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,0.8017600377400717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,0.9491786956787109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,0.8104159832000732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,0.7276159922281901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,1.079909324645996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,0.9688213666280111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.46091731389363605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.6029706796010336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.47468264897664386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.4331626494725545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,1.0705333550771077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.6111840009689331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.558079997698466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.5421813329060873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.4071360031763713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.3691306511561076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,0.9570293426513672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.5454026858011881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.4930560191472371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.4095093409220378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.5457706848780314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.4134933153788249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.37346665064493817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.5507893164952596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.49861868222554523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.4135253429412842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.5676053365071615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.37885332107543945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.5561546484629313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.5029813448588053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.243231991926829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.3192639946937561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.24901332457860312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.23241066932678223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.328549325466156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.306768000125885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.20933334032694498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.2833333412806193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.4185119867324829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.197818656762441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.28353599707285565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.2634720007578532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.2120479941368103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.28547199567159015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.20197333892186484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.2962453365325928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.2672640085220337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.2121760050455729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.2184213399887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.2918879985809326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.22048000494639078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.20549333095550537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.29423999786376953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.21446933348973593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.27086400985717773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.1361066699028015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.17586666345596313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.1400266687075297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.13090667128562927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.18044267098108926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.16698666413625082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.11600533127784729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.1552906632423401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.11777599652608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.10744532942771912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.15754666924476624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.14414933323860168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.1170240044593811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.15703466534614563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.10916800300280254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.15902400016784668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.14417066176732382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.1181653340657552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.15937599539756775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.12005333105723064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.11354666948318481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.16141866644223532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.1485973298549652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.0791733314593633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.10308800141016643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.08125333487987518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.07866133252779643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.09982400139172871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.07467199862003326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.09749333063761394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.0746666689713796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.09882666667302449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.4026453495025635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.07483733197053273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.09882666667302449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.06863999863465627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.0974720021088918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.09117333094278972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.07464533547560374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.09779199957847595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.07518399755160014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.06890133519967397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.09954133629798889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.06076799829800924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.07868800063927968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.059994667768478394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.05841066439946493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.073253333568573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.060085331400235496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.07859200239181519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.06039999922116598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.05995733539263407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.07877866427103679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.05772800246874491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.06916266679763794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.06111466884613037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.0787306676308314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.06054399907588959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.07864533364772797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.06052266558011373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.07250666618347168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,1.8327840169270833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,2.178933302561442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,1.8308533032735188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,1.700757344563802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,2.1765759785970054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,1.9986507097880046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,1.8420480092366536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,2.184607982635498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,1.8381013870239258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,2.1821066538492837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,2.0338667233784995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,1.9104372660319011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,2.2319893836975098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,1.8508960405985515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,1.807029406229655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,1.0193119843800862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,2.109861373901367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.2025813261667888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,0.9962240060170492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,0.9701120058695475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,2.212000052134196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.1805333296457927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,1.727882703145345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,0.9255466461181641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,0.9232693513234457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,0.857909361521403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,1.0925013224283855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.1299839814503987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,1.0128906567891438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,1.0994453430175781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,0.9308533668518066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,1.1061973571777344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,0.8673493067423502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,1.1053493022918701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,1.020250638326009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,0.9388373692830404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,1.1142773628234863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,0.9278559684753418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,0.9364799658457438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,0.8838826815287272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,1.1078933080037434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.6264533201853434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.5120906829833984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,0.4935306708017985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.6049386660257975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.5749386548995972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.4721173445383708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.5787680149078369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.472213347752889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,0.43901864687601727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.560698668162028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.5182026624679565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.4764853318532308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.5668479998906454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.4765919844309489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,0.4613279898961385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.5656799872716268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,1.0481173197428386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.523909330368042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.47890667120615643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.5680319865544637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,0.4474293390909831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.5678773323694865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.52729598681132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.27024000883102417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.31865066289901733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.26393600304921466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.5221866766611735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.25780266523361206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.31270400683085126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.30139732360839844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.2924639979998271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.2453226645787557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.2308746576309204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.29183467229207355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.2719573378562927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.24768000841140747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.2945706645647685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.2350026567776998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.2939466635386149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.477946678797404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.24889065821965536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.2956053415934245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.24968532721201578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.2344800035158793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.2974666754404704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.2776479919751485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.14616533120473227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.1720693310101827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.1444000005722046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.24791999657948813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.17161067326863608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.16660799582799277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.24634667237599692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.13134400049845377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.15582399566968283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.12989866733551025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.1239413321018219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.15654399991035461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.1458080013593038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.13201600313186646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.15717867016792297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.13226667046546936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.1265600025653839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.15686399737993875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.14845866958300272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.13366400202115378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.15988266468048096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.13396267096201578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.12803199887275696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.15893333156903586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.15014400084813437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.08103999992211659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.09502933422724406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.08092266817887624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.08063999811808269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.09577600161234538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.09517866373062134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.0747573326031367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.14378666877746582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.0747573326031367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.06937066713968913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.08238933483759563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.0749120016892751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.0888159970442454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.0749066670735677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.2752799987792969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.0890826682249705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.08269333342711131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.07483200232187907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.08989866574605306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.0755573312441508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.07063999772071838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.0905013382434845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.08306133250395457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.04850133260091146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.05641066531340281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.0466186652580897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.05572799841562907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.05406400064627329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.051818668842315674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.04656533400217692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.0553653339544932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.04713066418965658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.05395199855168661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.04773333172003428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.05770133435726166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.045034666856129967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.04093866546948751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.034202667574087776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.040287998815377556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.09074667096138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.034458667039871216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.0402453343073527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.03389866650104523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.03610666592915853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.0339626669883728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.03985599925120672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.033999999364217125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,1.77510404586792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,1.8018293380737305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,1.7734452883402507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,1.6544373830159504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,1.7990880012512207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,1.6752692858378093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,1.7822826703389485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,1.8111626307169597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,1.7773547172546387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,1.8077279726664226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,1.8580586115519206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,1.6757920583089192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,1.900858720143636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,1.796885331471761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,1.707749366760254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,1.751237392425537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,1.8830240567525227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,1.7720106442769368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,1.0096159776051838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,0.9706559975941976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,0.9455306529998779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,0.9884320100148519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,0.9646773338317871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,0.8972586790720621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,0.9909493128458658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,0.8959786891937256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,0.8319733142852783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,0.908250649770101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,0.840826670328776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,0.9013120333353678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,0.9149066607157389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,0.8985866705576578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,0.9116480350494385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,0.9133386611938477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,0.8473546504974365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,0.9064586957295736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,0.9253919919331869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,0.9132586320241293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,0.8589226404825846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,0.9222026666005453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.5189493497212728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.5176106691360474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.5067466497421265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,0.4813386599222819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,0.850869337717692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.4892053206761678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.458133339881897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.46456531683603924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,0.873194694519043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.42670400937398273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.42853331565856934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.4590080181757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.4678719838460286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.458026647567749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.4604640007019043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.4288746515909831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.4678613344828288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.43587199846903485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.4635466734568278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.4708640178044637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.46326935291290283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.4348213275273641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.5076586802800497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.4708213408788045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.4582293430964152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.46486401557922363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.25785066684087116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.2516426642735799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.26392533381779987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.2548000017801921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.23891200621922812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.24143467346827188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.2382133404413859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.2235520084698995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.24166399240493774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.2261013388633728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.24062933524449667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.24434133370717367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.267194668451945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.24099733432133993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.22758400440216064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.2440053423245748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.22883200645446777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.24258132775624594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.2468106746673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.24169067541758218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.2467306653658549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.23038933674494425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.142277330160141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.14416000247001648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.1402346690495809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.14019200205802917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.1426400045553843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.1421119968096415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.12618666887283325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.1297706663608551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.12743999560674033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.12114133437474568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.12921067078908285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.12167466680208842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.12796266873677573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.13008532921473184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.12796800335248312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.12192533413569133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.12897599736849466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.12406933307647705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.1284213364124298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.13210133711496988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.12923199931780496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.12507733702659607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.13153066237767538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.12578133742014566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.08135466774304707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.07684266567230225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.08050133287906647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.08054399987061818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.0718453327814738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.07390399773915608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.07259200016657512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.0683786670366923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.07300266623497009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.0682826687892278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.2622879942258199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.07262933254241943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.07284266750017802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.06764266888300578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.07247466842333476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.07266133526961009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.07458666463692983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.07281066477298737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.2280906637509664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.07426666716734569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.06785599887371063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.047877331574757896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.04750399788220724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.04576533536116282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.04660266637802124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.07257600128650665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.04562133550643921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.04576533536116282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.06965866684913635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.0462719996770223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.04284800092379252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.04598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.04330666859944662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.03472000112136205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.033813332517941795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.032469332218170166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.03602666656176249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.033930666744709015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,0.8236213525136312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,0.8099839687347412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,0.8215733369191488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,0.807423988978068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.044106667240460716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,0.7323946952819824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,0.8282240231831869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,0.8144426345825195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,0.8272106647491455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,0.763434648513794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,0.7507733503977457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,0.7424853642781576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,0.8375306924184164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,0.8226666450500488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,0.8322827021280924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,0.7995200157165527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,0.818506638209025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,0.7831306457519531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.47041066487630206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.464191993077596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.4604906638463338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.44618133703867596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.4527253309885661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.4515999952952067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.41759467124938965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.4105813503265381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.4171946843465169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.3869866530100505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.40985600153605145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.37674132982889813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.4217280149459839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,0.8120960394541422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.42060800393422443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.39029332002003986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.41285332043965656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.3810880184173584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.44277334213256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.4172746737798055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.42316265900929767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.39343468348185223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.41255998611450195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.4176853497823079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.38682134946187335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.23909332354863486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.23797865708669028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.2339893380800883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.23363200823465982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.24007999897003174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.21641600131988525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.21331733465194702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.2169119914372762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.20238399505615234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.21344532569249472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.1964799960454305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.2156053384145101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.21864000956217447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.20678933461507162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.2150826652844747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.20106667280197144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.22245333592096964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.2169333299001058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.22219733397165933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.20771199464797974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.21825599670410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.13274133205413818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.24244266748428345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.1301706631978353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.13121599952379862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.1291039983431498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.1279253363609314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.11779200037320454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.20220265785853067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.11839466293652852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.1306719978650411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.11598933736483256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.10598933696746826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.11892267068227132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.11751466989517212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.11957333485285442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.11563199758529663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.11698133746782939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.10980799794197083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.12079999844233195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.11783466736475627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.11987732847531636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.11372266213099162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.11845866839090984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.11210133632024129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.07362133264541626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.0727893312772115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.10930666327476501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.07267199953397115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.07340799768765767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.0684853345155716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.06776533524195354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.06206933160622915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.06972266733646393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.06796800096829732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.0642133355140686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.06665066878000896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.062261333068211876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.06858133276303609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.06760533154010773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.06858666737874348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.06465599934260051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.06810666620731354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.06252799928188324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.045509333411852516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.04497600098450979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.21926399072011313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.04142933338880539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.043151999513308205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.042954668402671814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.043791999419530235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.11152000228563945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.032399999598662056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.03235200047492981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.0317493329445521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.031167998909950256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.023823998868465424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.022634667654832203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.022015998760859173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.019551999866962433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.44790399074554443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.4472106695175171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.4475626548131307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.4445546468098958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.40858133633931476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.451909343401591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.45126934846242267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.450272003809611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.4179893334706624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.4499093294143677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.41832534472147626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.4562079906463623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.4551733334859212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.45313600699106854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.42315733432769775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.4535733461380005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.42421332995096844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.2568053404490153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.25733866294225055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.2531733314196269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.24465066194534302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.2518399953842163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.24478934208552042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.2307786742846171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.23243200778961182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.2320693333943685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.2139306664466858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.23099732398986816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.2130720019340515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.23482133944829306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.2332693338394165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.2340959906578064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.2214933236440023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.23295466105143228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.21994666258494058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.4098186492919922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.2371573249499003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.23612799247105917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.22186134258906046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.23521600166956583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.22009599208831787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.13788800438245138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.13562132914861044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.1339359978834788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.1350933313369751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.13402666648228964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.1234879990418752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.12378666798273723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.12450666228930156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.11385066310564677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.1239413321018219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.11430399616559346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.12405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.12416533629099528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.1255466639995575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.11557333668073018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.12372799714406331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.12532800436019897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.12591999769210815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.12583466370900473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.11938132842381795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.23491734266281128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.1258026659488678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.11788266897201538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.07447466750939687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.07408000032107036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.07546666761239369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.07372266550858815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.07633066674073537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.06864533325036366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.06891199946403503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.07034666836261749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.13962133725484213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.07026133437951405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.0653706689675649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.0703413337469101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.06904533505439758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.07479466497898102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.06554133196671803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.06921599805355072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.06558933357397716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.07091733316580455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.0702453354994456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.07047466437021892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.0649599979321162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.07041066884994507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.06497066716353099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.06868266562620799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.04444266855716705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.043381333351135254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.04152533411979675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.04423999786376953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.04426133135954539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.04365866879622141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.043749332427978516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.040661332507928215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.040405333042144775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.043738668163617454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.043162668744723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.030005333324273426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.030960001051425934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.030773334205150604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.03054400036732356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.030346666773160298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.030965333183606465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.024106666445732117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.023573334018389385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.02439466615517934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.018021332720915478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.024421334266662598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.32206400235493976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.3223680059115092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.3217066725095113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.28793599208196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.3208213249842326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.32496533791224164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.324672003587087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.3243306676546733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.29398399591445923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.3242826660474141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.2951359947522481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.32630399862925213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.2881973385810852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.3264906605084737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.29598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.326581339041392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.29573333263397217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.1831093430519104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.1835093299547831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.18120533227920532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.17262399196624756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.1806453267733256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.17299733559290567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.16849599281946817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.16931732495625815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.16962667306264242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.15054933230082193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.16993065675099692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.1508799990018209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.16962132851282755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.17058666547139487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.1693920095761617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.15339733163515726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.17131733894348145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.17062934239705405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.17082132895787558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.15646933515866598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.1707893411318461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.15677866339683533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.09511466821034749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.0956053336461385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.09497066338857015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.09423999985059102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.09500267108281453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.0904960036277771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.3270240028699239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.1700106660525004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.08118933439254761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.0904266635576884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.09123733639717102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.0819413314263026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.0904960036277771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.09098666906356812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.09149866302808125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.09114666779836018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.09092266360918681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.09098666906356812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.08349866668383281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.09089066584904988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.08509332935015361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.05426133175690969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.05100266635417938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.0539680023988088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.05183466772238413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.05375466744105021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.05407466491063436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.05197333296140035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.0531626691420873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.05273066461086273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.05062933266162872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.05036800106366476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.03542399903138479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.035530666510264076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.08301333089669545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.03435199956099192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.03365333378314972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.03433600068092346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.034688000877698265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.03598399957021078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.03561066587766012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.025834667185942333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.08994666735331218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.025792000194390614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.026799999177455902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.02703999976317088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.02430933217207591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.02604266752799352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.02033599962790807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.025786665578683216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.016501333564519882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.2611306707064311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.2590506672859192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.2600533366203308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.23004267613093057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.258949339389801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.2304853399594625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.26049067576726276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.2601919968922933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.23307732741038004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.2605066696802775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.23300800720850626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.2609279950459798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.26155734062194824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.2612266739209493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.23678400119145712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.26097599665323895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.2600160042444865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.14080533385276794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.13404267032941183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.14012266198794046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.1341600020726522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.13616533080736795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.13614933689435324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.13434132933616638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.23647467295328775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.12171199917793274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.13471466302871704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.12213333447774251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.1349120040734609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.13607999682426453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.13615467151006064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.12296533584594727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.13478933771451315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.12329066793123881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.1405226687590281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.13610133528709412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.13636799653371176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.12407466769218445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.1356053352355957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.12321066856384277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.0766293356815974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.0746559997399648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.07073600093523662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.14054399728775024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.07522666454315186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.07478400071461995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.07467733323574066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.06846400101979573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.07470933099587758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.06849599877993266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.07436266541481018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.072202667593956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.0743093341588974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.07445866862932841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.07524266839027405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.06850133339564006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.07659733295440674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.07469866673151652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.07061866422494252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.07459199925263722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.046442667643229164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.046336000164349876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.04649066428343455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.04606399933497111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.04372266431649526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.046256000796953835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.04486933350563049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.043375998735427856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.045706664522488914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.04557866851488749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.046495998899141945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.04197866717974345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.04599999884764353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.045552000403404236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.031290667752424874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.02974933385848999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.07489599784215291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.03053866575161616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.03050133337577184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.031040000418821972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.03062933435042699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.1367093324661255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.022805333137512207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.01876266673207283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.016517333686351776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.015978666643301647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.22563733657201132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.2244960069656372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.22498132785161337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.20211732387542725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.22460800409317017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.20228799184163412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.2246506611506144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.22524267435073853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.22570133209228516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.20358934005101523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.22618667284647623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.20379199584325156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.22622400522232056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.2258666753768921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.20333866278330484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.22506133715311685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.20409067471822104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.11970133582750957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.1204266647497813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.11153067151705424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.12015466888745625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.1179253359635671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.11796800295511882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.22629332542419434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.1186293363571167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.10923733313878377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.11969600121180217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.11795199910799663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.11974400281906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.11793599526087443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.11922666430473328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.10957333445549011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.11781332890192668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.10936533411343892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.1179146667321523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.1195146640141805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.11785067121187846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.11065600315729777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.11763733625411987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.10961066683133443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.10950932900110881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.06620799998442332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.06609066824118297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.06645866731802623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.06645333270231883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.06259733438491821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.06669333577156067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.06044266621271769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.06622399886449178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.06623999774456024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.06791999936103821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.06645866731802623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.06214933097362518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.06644266843795776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.06648000081380208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.06738133231798808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.11124799648920695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.04321066538492838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.041749333341916404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.04165333261092504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.042266666889190674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.04197866717974345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.04010133445262909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.027749332288901012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.02917333443959554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.027802666028340656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.028607999285062153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.02849599967400233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.02980799973011017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.021664001047611237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.021717332303524017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.021514666577180225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.020634666085243225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.027722666660944622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.022170667846997578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.02204799900452296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.018581333259741466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.01786133274435997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.0180479995906353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,float16,float16,0,0.19287999471028647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,float16,fp8,0,0.1938986579577128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,128,1,fp8,fp8,0,0.17510400215784708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,float16,fp8,0,0.19337066014607748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,fp8,fp8,0,0.17509333292643228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,float16,float16,0,0.19366933902104697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,float16,fp8,0,0.19329599539438883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,fp8,fp8,0,0.17508800824483237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,64,0,1,float16,float16,0,0.1923146645228068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,fp8,fp8,0,0.17505067586898804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,float16,float16,0,0.19204266866048178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,float16,float16,0,0.19346133867899576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,float16,fp8,0,0.1933493415514628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,128,1,fp8,fp8,0,0.17510932683944702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,128,1,float16,float16,0,0.1933599909146627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,fp8,fp8,0,0.17499732971191406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,float16,float16,0,0.10358400146166484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,0,0.10316266616185506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,float16,fp8,0,0.10317867000897725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,128,1,fp8,fp8,0,0.09430399537086487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,0,0.1035146713256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,64,0,1,fp8,fp8,0,0.0951039989789327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,float16,float16,0,0.10334933797518413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,float16,float16,0,0.10340799887975057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,float16,fp8,0,0.10347200433413188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,128,1,fp8,fp8,0,0.09500267108281453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,float16,fp8,0,0.10338667035102844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,64,0,1,float16,fp8,0,0.19337066014607748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,float16,float16,0,0.10347200433413188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,float16,float16,0,0.10339200496673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,float16,fp8,0,0.10368000467618306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,128,1,fp8,fp8,0,0.09507733583450317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,float16,fp8,0,0.10332799951235454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,64,0,1,fp8,fp8,0,0.09302399555842082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,float16,float16,0,0.10314133763313293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,float16,float16,0,0.10318932930628459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,float16,fp8,0,0.10338667035102844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,128,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,float16,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,64,0,1,fp8,fp8,0,0.09301867087682088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,float16,float16,0,0.05805333455403646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,64,0,1,fp8,fp8,0,0.09528533617655437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,128,1,fp8,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,fp8,fp8,0,0.05414933462937673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,float16,float16,0,0.05818133552869161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,64,0,1,float16,fp8,0,0.19319466749827066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,128,1,fp8,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,float16,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,float16,float16,0,0.05834133426348368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,float16,float16,0,0.058415999015172325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,float16,fp8,0,0.05987200140953064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,128,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,float16,fp8,0,0.05948266883691152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,float16,float16,0,0.05827199916044871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,float16,float16,0,0.05825600028038025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,float16,fp8,0,0.05839466551939646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,128,1,fp8,fp8,0,0.05431999762852987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,float16,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,64,0,1,float16,float16,0,0.05840000013510386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,128,1,fp8,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,0,0.03811733424663544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,64,0,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,float16,float16,0,0.03708266715208689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,float16,float16,0,0.03623999903599421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,128,1,fp8,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,64,0,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,float16,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,128,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,64,0,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,64,0,1,fp8,fp8,0,0.03525333354870478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,float16,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,128,1,fp8,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,float16,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,64,0,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,128,1,fp8,fp8,0,0.026202666262785595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,64,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,128,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,128,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,64,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,float16,float16,0,0.025946666797002155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,128,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,128,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,float16,float16,0,0.02075200031201045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,64,0,1,float16,float16,0,0.021744000415007275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,64,0,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,float16,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,64,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,float16,float16,0,0.01838933303952217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,64,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,64,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,float16,float16,0,0.015824000040690105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,64,128,1,float16,float16,0,0.017727999637524288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,1.721343994140625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,0,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,1.7336106300354004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,1.581567923227946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,1.7432907422383626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,10.960549672444662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,1.7570026715596516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,10.97052256266276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,9.981391906738281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,1.6066932678222656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,1.7631680170694988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,10.96561050415039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,1.7745173772176106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,1.6309332847595215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,10.998149871826172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,10.007680257161459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,11.011647542317709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,1.792367935180664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,1.8059786160786946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,1.6679147084554036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,10.043573379516602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,11.006703694661459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,1.0228853225708008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,11.072837829589844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,1.0424373149871826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,0.9748426278432211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,5.715370814005534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,10.083599726359049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,11.068127950032553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,0.9026186466217041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,0.910261313120524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,5.754330952962239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,5.228543917338054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,0.8314452966054281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,0.9056053161621094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,5.5706024169921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,0.9143413702646891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,5.082661310831706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,5.591930389404297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,0.8393226464589437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,0.9125866889953613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,5.580383936564128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,0.9205013116200765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,5.098656018575032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,5.581621170043945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,5.604037602742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,0.8472959995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,0.9268159866333008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,0.9371573130289713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,5.098976135253906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,5.610490798950195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,0.8657973607381185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.5603040059407552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,5.613722483317058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.5721706549326578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,2.966831843058268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,5.615781148274739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,0.5417706569035848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.5062133471171061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,2.718965212504069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,5.126517295837402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.5100586811701456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,0.4726879994074504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,2.9069865544637046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.5082506736119589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,2.9803466796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,2.652554670969645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.5110773245493571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,0.47516266504923504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,2.9004265467325845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.510319987932841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,2.897775967915853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,2.6559200286865234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.5165546735127767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,0.48018133640289307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,2.900767962137858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,2.8999252319335938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.5181920131047567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,2.910122553507487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,2.6557440757751465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.5240373214085897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,0.4877653519312541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,2.9108479817708335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.3964960177739461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.39682666460673016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,2.917248090108236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,1.649888038635254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.37618664900461835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.3962080081303914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,1.6506880124409993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,1.5118080774943035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.39608534177144367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.3738400141398112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,1.6415306727091472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,2.6673173904418945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.39636266231536865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,1.6417439778645833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.39688531557718915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.37375466028849286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,1.6446666717529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,1.5095574061075847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.39635201295216876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,1.509738604227702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.39870933691660565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,1.644693374633789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,1.6435573895772297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.396565318107605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,1.644320011138916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,1.5119039217631023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.39636266231536865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.3742133378982544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,1.6464427312215169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,1.6495253245035808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,1.2859040101369221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,1.2959787050882976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,1.5103519757588704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,1.1786080201466878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.3777066469192505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,6.447194417317708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,1.2929973602294922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,1.3008639812469482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,5.873269399007161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,6.459695816040039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,1.1921119689941406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,1.3024106820424397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,6.461493174235026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,1.3168586889902751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,5.8982187906901045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,6.462218602498372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,1.2074027061462402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,1.3280746936798096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,6.466346740722656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,1.3405493100484211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,5.903130849202474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,1.2371466954549153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,6.508015950520833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,6.4968210856119795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,0.7674667040506998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,0.7845226923624674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,5.934064229329427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,0.7327146530151367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,3.410949389139811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,0.681167999903361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,3.12775452931722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,6.51034673055013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,0.6873066425323486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,0.6312533219655355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,3.3079681396484375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,0.6847519874572754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,3.4253387451171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,3.013696034749349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,3.306426684061686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,0.6365813414255778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,3.307093302408854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,0.6896053155263265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,3.3125438690185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,3.018320083618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,0.6967306931813558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,0.6419893503189087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,3.319888114929199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,0.7007413705190023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,3.321807861328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,3.0265280405680337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,0.7081600030263265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,0.655407985051473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,0.6901386578877767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.4266986846923828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,3.3358240127563477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.43687466780344647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,3.038837432861328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.4132853349049886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,1.7953279813130696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,3.3388001124064126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,1.7813866933186848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,1.643071969350179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.3850666681925456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.39006932576497394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.361189325650533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,1.7329546610514324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.3867199818293254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,1.7366827328999836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.3912159999211629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.36343999703725177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,1.741194725036621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,1.5922400156656902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.3924266497294108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.3946293195088704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,1.7351786295572917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,1.740938663482666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,1.5922986666361492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,1.744490623474121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,1.5985493659973145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.39578131834665936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.4002186854680379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.3736906846364339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.36742401123046875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,1.7484265963236492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.30425065755844116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,1.7565919558207195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,1.0226506392161052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.28761066993077594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,1.023525317509969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,0.9399680296579996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.3004159927368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.3016960024833679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,1.0162933667500813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.2855093280474345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,1.0176533063252766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,0.9355466365814209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.301962673664093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.3020000060399373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,1.016874631245931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.30453334252039593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,1.0171466668446858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,0.9372586409250895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.3020053307215373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.3020426630973816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,1.0173439979553223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.28656532367070514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.2852746645609538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,1.0179039637247722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,0.93831467628479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.3041173418362935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,1.0197227001190186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.28683199485143024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,1.022981325785319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,0.9363786379496256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.30260799328486127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,1.0728800296783447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,1.0787359873453777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,0.9825812975565592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,1.6025066375732422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,1.078554630279541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,4.64576530456543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,4.2311201095581055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,4.636250813802083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,0.9917013645172119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,4.638223965962728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.0853652954101562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.0864373048146565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,4.240533192952474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,4.653327941894531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.0957280000050862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,1.0027039845784504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.1043946743011475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,4.253093401590983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,4.666357358296712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.1156799793243408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,4.683578809102376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,4.649669329325358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.6408053239186605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,4.71610673268636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,4.276037216186523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,0.6147253513336182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,2.4706880251566568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.6527040004730225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,2.487237294514974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,2.2671093940734863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.5681013266245524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.5740640163421631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,0.5274773438771566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,2.3871092796325684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.5722986857096354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,2.388821283976237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,2.183135986328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.5765440066655477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,0.5307413339614868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,2.391109307607015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.5784586668014526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,2.39191468556722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.583354671796163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,2.397333304087321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,0.537610650062561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,2.4047199885050454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,2.1875573794047036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.586079994837443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,2.1923893292744956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.5941173235575358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,0.5484373172124227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,1.025210698445638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,2.4106027285257974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.3574399948120117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.3659093379974365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,2.203765392303467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.34673599402109784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,1.3157386779785156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,1.201370636622111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.32052799065907794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.3214346567789714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,1.306831995646159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,1.2604479789733887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,1.2626346747080486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,1.16157333056132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.3205333352088928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.3323093255360921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,1.2639626661936443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.3023040095965068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,1.265221357345581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,1.1622186501820881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.32445865869522095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.326693336168925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,2.4129066467285156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,1.2703839937845867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.3064799904823303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.33053332567214966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,1.271663983662923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,1.1679306825002034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.33480532964070636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.31409599383672077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,1.2733813126881917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.25651200612386066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,1.2779413064320881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.2581226627031962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.24266133705774942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,0.7630826632181803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,1.1721173127492268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,0.762336015701294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,0.7565279801686605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.25276799996693927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.24087466796239218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.3033653299013774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.2532586654027303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,0.6973919868469238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.2547893325487773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.2546666661898295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,0.7562399705251058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.2419253389040629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,0.7585386435190836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,0.7022773424784342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,0.699455976486206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.252895991007487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.25432000557581586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,0.7579200267791748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,0.75764266649882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,0.7001653512318929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.2547786633173625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.25492266813913983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.24090667565663657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.24038932720820108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,0.7588746547698975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,0.6985599994659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,0.7587359746297201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,1.6770240465799968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,0.7566239833831787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,1.6876586278279622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,1.5351840655008953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,1.6997973124186199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,6.145072301228841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,6.150165557861328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,1.713370641072591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,5.592645645141602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,1.5612907409667969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,6.1764265696207685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,1.7146080334981282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,1.7265067100524902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,5.622581481933594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,1.58243195215861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,6.205365498860677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,1.751157283782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,6.201007843017578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,6.187072118123372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,5.642394383748372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,1.7613439559936523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,1.6190452575683594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,0.9773920377095541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,6.247754414876302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,0.9972426891326904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,6.253760019938151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,3.2641814549764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,5.6839040120442705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,0.9286613464355469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,0.8573546409606934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,3.282714525858561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,0.8641546567281088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,3.1165119806925454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,0.7863039970397949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,2.9924214680989585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,0.863205353418986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,3.12500794728597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,2.8459145228068032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,0.7957706451416016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,3.129434585571289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,0.8695147037506104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,2.849498748779297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,0.8773279984792074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,0.8024640083312988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,3.1419251759847007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,3.1413440704345703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,2.8613014221191406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,3.134645462036133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,0.8825493653615316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,0.8935519854227701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,0.8216586907704672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,3.165893236796061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,0.8723413149515787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.5147680044174194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,3.1662025451660156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,1.6818559964497883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,1.6966346104939778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,2.876650810241699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,0.4939200083414714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,1.5515519777933757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.46004267533620197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,1.613386631011963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.5262506802876791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.4575413465499878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,1.6185173988342285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.4609440167744954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.46426665782928467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.42473065853118896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,1.6184906959533691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.43009066581726074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.46434664726257324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,1.6192213694254558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,1.4808160463968914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,1.4764053026835124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.46832001209259033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.4330506722132365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,1.6230506896972656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.47084800402323407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,1.6307679812113445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,1.4893120129903157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.4761226574579875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.4416586558024089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.28945066531499225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,1.6405653953552246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,0.9003946781158447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.2815413276354472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,1.6324159304300945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,0.908618688583374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.259061336517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,0.8326666355133057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.2592159907023112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,0.86516801516215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.24625066916147867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,0.8655573527018229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,0.7969653606414795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.2593653400739034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,1.4950453440348308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.2959360082944234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,0.866592009862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,0.8161546389261881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.26097599665323895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.2632266680399577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.24712532758712769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.26498132944107056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,0.8677386442820231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,0.8735146522521973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,0.8027573426564535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.26740799347559613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.27109332879384357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,0.8767946561177572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.2635306715965271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.24972800413767496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,0.8041439851125082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.539685328801473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.20703999201456705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,0.8662239710489908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.19614400466283163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.5405333439509074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.49858665466308594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.2034506599108378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.2078346610069275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.20282665888468424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.19340799252192178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.5334453185399374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.49456000328063965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.2036479910214742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.20364266633987427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.193338672320048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.5336800018946329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.5347520112991333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,0.880682627360026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.49583999315897626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.2036799987157186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.5344320138295492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.5376586516698202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.193557341893514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.5367733240127563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.49451200167338055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.2039946715037028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.536512017250061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.19523733854293823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.20387200514475504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.49737600485483807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,1.249008019765218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.2060640056927999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,1.2595573266347249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.53657599290212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,1.1382880210876465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,3.7168639500935874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,1.259050687154134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,1.2676693598429363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,3.3747361501057944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,1.156874656677246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,3.729519844055176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,1.2718453407287598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,3.725482622782389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,3.7323732376098633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,3.3913065592447915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,1.2797599633534749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,1.170250654220581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,1.2955573399861653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,3.742117245992025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,3.4117228190104165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,1.3075733184814453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,1.202618678410848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,3.766122817993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,0.7377653121948242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,3.7842025756835938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,3.44376532236735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,1.9987732569376628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,0.6993546485900879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,3.745888074239095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,2.0126773516337075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,1.8408106168111165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,0.7522079944610596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,0.651807983716329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,0.5953546762466431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,1.894821325937907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,0.6518613497416178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,0.6457653443018595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,1.9052693049112956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,0.6564693450927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,0.6026293436686198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,1.9011306762695312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,0.657034675280253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,1.738800048828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,1.7335626284281414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,0.6626400152842203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,0.6073013146718343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,1.9088373184204102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,0.6680693626403809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,1.9190293947855632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,1.7449866930643718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,0.6752053101857504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,0.6203093528747559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,1.9225600560506184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.3914399941762288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,1.93395201365153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,1.9079200426737468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.4020586808522542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.37573333581288654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,1.0538880030314128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,0.9602933724721273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.34645334879557294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.3490613301595052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,0.989786704381307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,1.0413119792938232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,0.9930933316548666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,0.9114933013916016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.34882664680480957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.3530879815419515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,1.7588159243265789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.32867199182510376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.3245120048522949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,0.9963680108388265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,0.9128639698028564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.35607465108235675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,1.0007359981536865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.33105599880218506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,0.9940053621927897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.36111998558044434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,0.9169812997182211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.35492265224456787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,1.0069493452707927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.3372533321380615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,1.0205386479695637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,0.9225546518961588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.22409600019454956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,1.003983974456787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.23036799828211466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.21875733137130737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.3643840154012044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.5724320014317831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.19944000244140625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.5396906534830729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.5683466593424479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.18935465812683105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.5392160018285116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.4977333148320516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.198362668355306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.19937600692113241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.5418826738993326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.20113599300384521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.19152534008026123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.2017013430595398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.5009493430455526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.5272800127665201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.2040533423423767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.19193599621454874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.5442453225453695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.20472532510757446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.5476213296254476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.5429173310597738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.2057173252105713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.19737066825230917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.5429759820302328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.5495893160502116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.162800004084905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.5019199848175049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.16245866815249124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.1541920006275177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.35250667730967206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.3263840079307556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.1586026648680369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.34918399651845294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.3492799997329712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.35228268305460614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.3227360049883525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.15979199608167013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.34832000732421875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.15037332971890768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.15878933668136597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.3521226644515991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.3245226740837097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.5068639914194742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.3489760160446167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.15826666355133057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.1506613294283549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.34902934233347577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.32785600423812866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.1588106652100881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.3500746488571167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.15877333283424377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.15269333124160767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.3490133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.33050666252772015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.16037333011627197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.1593706707159678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,1.663589318593343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,3.715221405029297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,1.5062294006347656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,1.6751573880513508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,3.3783413569132485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,1.6488107045491536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,1.6872800191243489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,1.5359039306640625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,3.731594721476237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,1.6899305979410808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,3.7513599395751953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,3.412597338358561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,3.7383200327555337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,1.7035306294759114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,1.55731201171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,3.764677365620931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,1.727952003479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,3.7692693074544272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,3.426447868347168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,1.7420159975687664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,1.5979679425557454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,3.8021440505981445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,0.9563093185424805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,0.9716320037841797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,2.0139147440592446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,3.475914637247721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,0.9046453634897867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,0.8328213691711426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,1.8577440579732258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,3.8098986943562827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,0.8408053716023763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,1.8764692942301433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,2.030229409535726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,1.8884213765462239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,1.7100159327189128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,0.8382346630096436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,0.8474187056223551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,0.7651253541310629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,0.7725706895192465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,1.8894453048706055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,0.8470933437347412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,1.8928267161051433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,1.7177492777506511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,0.8549386660257975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,1.8952053387959797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,1.9004480044047039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,1.7292799949645996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,0.861519972483317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,0.8716959953308105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,0.798362652460734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,1.9212160110473633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.49853865305582684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,1.748634656270345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,1.9136959711710613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.5053120056788126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,1.0367999871571858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,1.054853359858195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,0.9582506815592448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.4336479902267456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,0.9695146878560384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.4028480052947998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,0.47202134132385254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,0.973311980565389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,0.7803626855214437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.43795732657114667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.44810132185618085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.4065599838892619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,0.8879413604736328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,0.891599973042806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,0.9726880391438802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.4426453510920207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,0.978378693262736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,0.9794133504231771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,0.9826719760894775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.4410133361816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.44898664951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,0.8970080216725668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.44731199741363525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.4638346831003825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.41068800290425617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,0.9930400053660074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,0.9035840034484863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.2664693395296733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.5502346754074097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.272490660349528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.25774399439493817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.556112011273702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.418720006942749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.23083732525507608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.2344320019086202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.2220053275426229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.515775998433431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.4774080117543538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.513701319694519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.5123146772384644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.2364586591720581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.2237386703491211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.5179733435312907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,0.9869600137074789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.23665066560109457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.5133653481801351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.246943990389506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.22603732347488403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.5169653495152792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.23425066471099854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.24232532580693564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.5175413290659586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.24653865893681845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.2311840057373047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.5273760159810384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.48425066471099854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.15197867155075073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.308133323987325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.15620799859364828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.15063466628392538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.47764265537261963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.2896639903386434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.13422399759292603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.523423989613851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.28943467140197754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.12574399511019388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.28991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.26532800992329914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.13607466220855713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.2887413303057353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.13436800241470337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.1337279975414276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.12571199735005698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.2885706623395284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.2653440038363139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.13395200173060098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.29000532627105713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.13609066605567932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.1283146639664968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.2900373339653015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.2674880027770996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.13615999619166055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.2916799982388814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.13498666882514954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.29397332668304443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.4821653366088867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.2754720052083333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.11294933160146077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.13768000404040018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.20430399974187216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.10982400178909302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.10769066214561462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.20146665970484415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.18881599108378092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.11149332920710246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.2014346718788147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.10951466361681621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.10664000113805135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.19944000244140625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.18742932875951132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.10937600334485371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.1996906598409017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.10949333508809407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.10553600390752156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.19989866018295288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.18715200821558634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.11142933368682861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.20084800322850546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.10749866565068562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.3123626708984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.18758400281270346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.11151466766993205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.20126400391260782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.19994133710861206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.18650132417678833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.20190932353337607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,1.2310187021891277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.11142399907112122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.2424266338348389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,1.12336532274882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,2.339381376902262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,2.1168266932169595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,2.3349173863728843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,1.2524693012237549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,2.342954635620117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,1.1386666297912598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,2.3549866676330566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,2.1339359283447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,1.2657653490702312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,1.2444533507029216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,2.361402670542399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,1.1549546718597412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,1.2552053133646648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,2.3657546043395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,1.2799519697825115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,2.388026714324951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,1.183461348215739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,2.1540053685506186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,2.3971254030863443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,0.7209973335266113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,1.2888533274332683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.2886559963226318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,0.683690627415975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.3004000186920166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.192639986673991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,0.6294399897257487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,1.1883412996927898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,0.7321279843648275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,0.577616016070048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.1924693584442139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,2.1825173695882163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,1.0832160313924153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,0.6338026523590088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,0.6395893494288126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,0.5948426723480225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.1988639831542969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,1.086682637532552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,0.6396533250808716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.2016373475392659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,0.6464853286743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.6461973190307617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,0.6088853279749552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.2061013380686443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,0.651306668917338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,0.6790560086568197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.213146686553955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.1931839783986409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,0.6032960017522176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.3775413433710734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.2210079828898113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,1.1076373259226482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,1.0956693490346272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.38397332032521564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.3800906737645467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,0.6749973297119141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.3285280068715413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.33101866642634076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.3062613407770793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.6196693181991577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,0.6690346399943033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.5693493286768595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.6215360164642334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.6177440087000529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.33240532875061035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.31165866057078045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.6208373308181763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.6219573418299357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.5719840129216512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.33558400472005206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.6257280111312866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.3390880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.3132586677869161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.6279413302739462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.3357173204421997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.5748639901479086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.3434079885482788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.34624000390370685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.3195626735687256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.6366879940032959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.20368534326553345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.3593013286590576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.20427733659744263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.36403199036916095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.33902935187021893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.6318346659342448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.17719467480977377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.3290773431460063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.16960533459981283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.34401599566141766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.30850134293238324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.17841066916783652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.33111466964085895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.17856534322102866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.17087467511494955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.3319999972979228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.310261329015096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.18092799186706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.3325706720352173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.5811359882354736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.20882133642832437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.3345706860224406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.3123146692911784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.18689600626627603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.3391253153483073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.18125865856806436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.17323199907938638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.33933866024017334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.31617599725723267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.11788800358772278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.20457067092259726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.12146133184432983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.18173333009084067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.11758933464686076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.17961066961288452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.19549334049224854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.10684266686439514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.19219734271367392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.09935999910036723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.19350399573644003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.17749333381652832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.10665067036946614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.18844799200693765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.19149333238601685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.20642133553822836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.09919466574986775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.191429336865743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.17733333508173624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.1074666678905487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.19171200195948282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.10761599739392598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.1913386583328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.17723733186721802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.10667199889818828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.19161067406336466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.10873599847157796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.10326932867368062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.19339734315872192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.18173867464065552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.10645332932472229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.08244266609350841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.1398240029811859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.13184000054995218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.0867680013179779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.10774399836858113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.13834133744239807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.08542933066685994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.08691199620564778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.13803733388582864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.13035200039545694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.08695466319719951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.13833066821098328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.08270933230717976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.13833600282669067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.12999467055002847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.084906667470932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.13821333646774292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.08624000350634257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.08286933104197185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.1379680037498474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.1306986709435781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.0867786705493927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.13969600200653076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.08292266726493835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.13897066315015158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.12986133495966592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.08351999521255493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.13859732945760092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,1.656394640604655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,1.65666659673055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,1.4895040194193523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,2.5202560424804688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,1.6806666056315105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,2.514832019805908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,1.6817173957824707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,1.5067253112792969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,2.264293352762858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,2.54146671295166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,1.712842623392741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,2.5425119400024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,1.706208070119222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,1.5257813135782878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,2.280245304107666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,2.5677013397216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,2.3036319414774575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,1.7492000261942546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,2.5731627146402993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,1.7413652737935383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,2.613589286804199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,2.6064000129699707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,0.9507359663645426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,2.336693286895752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,0.9599733352661133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,1.394821325937907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,0.9127893447875977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,1.4021652539571126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,1.2921226819356282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,0.8228853543599447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,0.8288586934407552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.256815989812215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,1.5616374015808105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.2632746696472168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,1.1431466738382976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,0.8268586794535319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,0.8369226455688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.2657333215077717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,0.7516853014628092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.273637294769287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,0.8378240267435709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,0.8452906608581543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.2735573450724285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,0.7591679890950521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,0.7697546482086182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.2813866933186848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,1.1623786290486653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,0.8530026276906332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,0.8616586526234945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.2903680006663005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.29857603708903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,1.1534720261891682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,0.7134933471679688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,0.7857333024342855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.49489065011342365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.4598720073699951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.1786346435546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,0.7214826742808024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,0.6651680072148641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.49557868639628094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.4267146587371826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.3901493151982625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.6504213412602743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.5930080016454061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.43824533621470135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.6510399977366129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.4304639895757039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.4020853439966838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.6461546818415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.597050666809082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.43088531494140625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,0.6557333469390869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.4111786683400472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.42140265305836994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.6526773373285929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.601583997408549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.43926934401194256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.435263991355896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.4068160057067871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.6591253280639648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.607861320177714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.2571306626001994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,0.6644373337427775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.3763573169708252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.2611946662267049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.24724799394607544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.37958399454752606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,0.6680586338043213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.22013866901397705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.3374933401743571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.20986133813858032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.44483200709025067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.35290666421254474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.3164586623509725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.2225386699040731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.3394560019175212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.3381439844767253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.22504534324010214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.34189867973327637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.3185546596844991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.22224533557891846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.22633600234985352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.3429866631825765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.22830400864283243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.2153653303782145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.34487466017405194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.2120479941368103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.23249600330988565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.34887464841206867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.23611199855804443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.21947733561197916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.3521866798400879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.3261973261833191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.20644267400105795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.14342400431632996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.13878933588663736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.21093332767486572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.19768534104029337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.11987200379371643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.18528532981872559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.1116373340288798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.18557866414388022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.17177067200342813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.14097066720326742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.1856480042139689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.12170666456222534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.12166399757067363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.3214400013287862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.12164800365765889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.18578133980433145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.11956266562143962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.11545600493748982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.18734399477640787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.17493333419164023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.18556267023086548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.17316800355911255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.12585600217183432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.12210133671760559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.12154666582743327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.1809813380241394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.08117333551247914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.11966400345166524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.08413867155710857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.08299200236797333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.1202346682548523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.11771733562151591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.07803733150164287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.11431466539700826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.07809066772460938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.07141333321730296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.11388267079989116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.12292266885439555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.10529067118962605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.07702399790287018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.11362666885058086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.07753600180149078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.07271466652552287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.11340799927711487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.1074026624361674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.07668800155321757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.11382933457692464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.07826666533946991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.07287999987602234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.11428266763687134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.10646399855613708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.07818666597207387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.11360533038775127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.07863999903202057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.07458666463692983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.11408533652623494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.189082662264506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.06201066573460897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.08739200234413147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.06263466676076253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.05961066484451294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.0888853371143341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.08368000388145447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.062165334820747375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.08877333005269368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.06244266529877981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.08910399675369263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.0842186709245046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.061941335598627724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.0881173312664032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.06238399942715963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.05879466732343038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.08906666437784831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.08451199531555176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.06275199850400288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.08872532844543457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.0622026671965917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.06000000238418579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.0886346697807312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.08298133313655853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.062090665102005005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.08935999870300293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.06247999767462412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.10773866375287373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.060085331400235496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.08306666711966197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.19129600127538046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.2387573719024658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,1.661146640777588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.2407253583272297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,1.1227306524912517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,1.503167947133382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.2664426962534587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,1.6877013842264812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.268778642018636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,1.1390026410420735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,1.6892159779866536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,1.6662880579630535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.2825813293457031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.2777547041575115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,1.7039252916971843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,1.5213653246561687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,1.700170675913493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,1.3024533589680989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,1.7279839515686035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.3029013474782307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,1.5331093470255535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,1.1804640293121338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,0.720730702082316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,1.1502079963684082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,1.7286453247070312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,1.5608906745910645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,0.941210667292277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,0.6769813696543375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,0.9487626552581787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,0.8736426830291748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,0.8378720283508301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.627402663230896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,0.5687626600265503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,0.8424853483835856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,0.7268693447113037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,0.7751893202463785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.646021326382955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,0.8429173628489176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.6323573191960653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,0.5753973325093588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,0.8486719926198324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.6337226629257202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,0.8496692975362142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.6398506561915079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,0.6117013295491537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.6221386591593424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,0.8564533392588297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,0.7754666805267334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,0.7705439726511637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,0.8633226553599039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,0.6700479984283447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,0.5952426592508951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,0.8692746957143148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.370197335879008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,0.788271983464559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.4843786557515462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.3750186761220296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.3601919809977214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.4891093174616496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.3197279969851176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.43114133675893146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.2981920043627421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.4338879982630412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.39981865882873535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.3219733238220215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,0.6462986469268799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.4336693286895752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.30221333106358844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.44518399238586426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.40301334857940674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.45300265153249103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.3311573266983032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.33109867572784424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.30590399106343585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.4413493474324544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.3287946581840515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.3346240123112996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.32449599107106525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.44602668285369873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.3121119936307271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.4498240152994792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.40434134006500244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.19610132773717245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.43775467077891034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.2571946581204732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.33821332454681396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.18972265720367432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.26198933521906537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.244704008102417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.16689600547154745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.22604266802469888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.4126986662546794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.16074666380882263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.2271626591682434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.2140000065167745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.1669386625289917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.2264853318532308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.1696959932645162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.16850666205088297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.1628320018450419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.22982933123906454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.21643733978271484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.20159467061360678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.22985066970189413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.17246399323145548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.16567466656366983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.2323466738065084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.21839465697606406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.17562667528788248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.23644800980885824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.1788426637649536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.23879466454188028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.22403732935587564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.10782399773597717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.14217066764831543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.11156266927719116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.1702079971631368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.1441920002301534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.1395840048789978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.09418666362762451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.1281760036945343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.09514133135477702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.08906132976214091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.11820800105730693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.09496532877286275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.12803733348846436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.09691199660301208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.08922132849693298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.1279306709766388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.12011733651161194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.09495466947555542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.09641599655151367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.08984532952308655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.12963733077049255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.12040000160535176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.09718933701515198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.1293333371480306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.09948266545931499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.09141866366068523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.16897066434224448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.1218773325284322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.06374399860699971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.08525333801905315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.06465599934260051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.08455999692281087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.062208001812299095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.08322666585445404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.0830506682395935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.07668800155321757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.061333333452542625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.08229866623878479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.13064000010490417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.08273066580295563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.06226666768391927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.08282133440176646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.08265600105126698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.07761066655317943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.06025066475073496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.08275733391443889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.06252266466617584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.05867200096448263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.08275733391443889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.07830933233102162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.06590400139490764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.06651733318964641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.05380799869696299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.05195199946562449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.05167999863624573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.0644160012404124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.062224000692367554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.06540800134340923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.052101333936055504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.06612266600131989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.062458669145902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.05811200042565664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.06599466502666473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.05388266841570536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.12803733348846436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.05236266553401947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.06576000154018402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.05435200035572052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.06648000081380208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.0629120022058487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.05169066786766052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,1.4974667231241863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,1.7734187444051106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,1.4029653867085774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,1.7609492937723796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,1.6527360280354817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,1.4844373067220051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,1.788256009419759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,1.3830933570861816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,1.7814240455627441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,1.617461363474528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,1.5511786142985027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,1.8214027086893718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,1.5097227096557617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,1.5466666221618652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,1.5913653373718262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,1.500362714131673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,1.8195573488871257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,1.8607360521952312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,1.5354933738708496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,1.4981279373168945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,1.803391933441162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,1.6072160402933757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,0.7868159612019857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,0.9348479906717936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,1.8439146677652996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,0.7748533089955648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,0.7828213373819987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,0.917680025100708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,0.916208028793335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,0.7583626906077067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,1.7813386917114258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,0.7525226275126139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,0.6976906458536783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,0.8904106616973877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,0.8221866289774576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,0.7670933405558268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,0.9005920092264811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,0.6988000075022379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,0.8976586659749349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,0.8988640308380127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,0.7760852972666422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,0.9153333504994711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,0.7727306683858236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,0.7601813475290934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,0.7841280301411947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,0.9126453399658203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,0.8223733107248942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,0.9318880240122477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,0.7903733253479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,0.8947359720865885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,0.7542826334635416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,0.9163146813710531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.4031146764755249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.479039986928304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.3948586781819661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.3999413251876831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.4705173174540202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.46558932463328045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.40723200639088947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.4593386650085449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.40400532881418866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.3760853211085002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.45795734723409015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.4196586608886719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.39379199345906574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.46288001537323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.39128533999125165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.3593386809031169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.46243735154469806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.4227786858876546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.3997439940770467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.4702986478805542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.3983039855957031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.41471465428670246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,0.9189919630686442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.451200008392334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.39000535011291504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.46169066429138184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.3863999843597412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.3884693384170532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.4583040078481038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.45292266209920246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.2507680058479309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.208570659160614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.21229867140452066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.21189866463343301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.24945600827534994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.24467732508977255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.20168532927831015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.24058133363723755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.2016106645266215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,0.7623039881388346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.23750933011372885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.22037333250045776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.2444960077603658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.2038080096244812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.18738667170206705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.18835200866063437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.22212799390157065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.20773333311080933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.2051360011100769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.2446026603380839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.20779732863108316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.2003893256187439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.24558399120966592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.2328959902127584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.20570133129755655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.24220265944798788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.20406399170557657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.19620800018310547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.24223999182383218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.2302293380101522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.11689600348472595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.138154665629069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.11558399597803752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.11705600221951802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.13778666655222574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.13673599561055502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.1086293359597524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.12994133432706198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.10851732889811198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.10046399633089702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.13103466232617697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.1199679970741272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.10958400368690491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.13193600376447043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.10956266522407532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.1030453344186147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.13182399670283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.12169599533081055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.11178132891654968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.13397866487503052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.11141866445541382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.10692266623179118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.13505066434542337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.12566933035850525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.4902079900105794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.13133333126703897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.1081119974454244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.13194666306177774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.1281066636244456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.0643146683772405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.0767146646976471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.06472533444563548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.06642666459083557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.07705066601435344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.06412800153096516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.0743999977906545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.06281599899133046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.06019733349482218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.07504533231258392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.06249066690603892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.07454399764537811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.05862933397293091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.07463466624418895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.07050133248170216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.0769760012626648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.2407360076904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.06271466612815857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.07366399963696797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.0642986645301183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.07645333309968312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.10970667004585266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.061706667145093284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.06087466577688853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.07236800094445546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.051039998730023704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.06233599781990051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.04195733368396759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.07474133372306824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.05035200218359629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.050288001696268715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.04161600023508072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.03878933439652125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.06444799900054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.04818666477998098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.04005333284536997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.05006400247414907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.050581331054369606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.049216002225875854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.04144000013669332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.04982399940490723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.03320533285538355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.03324799984693527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.03199466566244761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.03363733241955439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.025807999074459076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,1.4510186513264973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,1.4694933891296387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.02794666588306427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,1.4408532778422039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,1.371664047241211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,1.4632426897684734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,1.371664047241211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,1.4656906127929688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,1.481808026631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,1.4615306854248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,1.3439839680989583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,1.4767413139343262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,1.3737227121988933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,1.505930741628011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,1.526917298634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,1.4898239771525066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,1.5711359977722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,1.5203626950581868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,1.584778626759847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,1.4965173403422039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,1.5072320302327473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,1.4591360092163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,1.5557759602864583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,1.4917759895324707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,0.7643199761708578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,0.7902773221333822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,0.7578453222910563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,0.750981330871582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,0.7665812969207764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,0.7617599964141846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,0.749882698059082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,0.7638239860534668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,0.7298933664957682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,1.569386641184489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,0.7406079769134521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,0.6788586775461832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,0.7403093179066976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,0.7547679742177328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,0.7360693613688151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,0.6728106339772543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,0.7499519983927408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,0.677232027053833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,0.6730720202128092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,0.7650132973988851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,0.7483839988708496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,0.7764800389607748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,0.7976586818695068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,0.7794826825459799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,0.7374453544616699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,0.7501760323842367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,0.7303573290506998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,0.7525013287862142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,0.809445301691691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,0.7441066900889078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,0.7690560022989908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.4204426606496175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.3864426612854004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.3845280011494954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.3951573371887207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.3901439905166626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.37723731994628906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.3812853495279948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.37390931447347003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.3438933293024699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.3799999952316284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.34937600294748944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.37836798032124835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.38682134946187335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.37754666805267334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.34772801399230957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.3832426468531291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.35156798362731934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.39311468601226807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.39113601048787433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.3850133419036865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.37637333075205487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.38975465297698975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.37937601407368976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.3792639970779419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.38488535086313885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.37538135051727295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.37701865037282306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.38064531485239667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.3826506535212199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.20571200052897134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.20992000897725424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.20320000251134238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.20221867163976034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.20778665939966837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.20707199970881143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.19662400086720785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.199946661790212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.1955146590868632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.18147200345993042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.19828800360361734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.18262932697931925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.20018132527669272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.20153067509333292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.19852266709009805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.18319465716679892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.19955732425053915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.19516799847284952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.2035413384437561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.20380266507466635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.20222399632136026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.1945120096206665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.2034346659978231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.19645333290100098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.19951466719309488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.20116267601648966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.38702932993570965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.19511467218399048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.20021865765253702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.19631999731063843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.11768000324567159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.11177600423494975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.11519466837247212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.11558399597803752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.10665067036946614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.10739200313886006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.09730666875839233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.10732266306877136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.09927999973297119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.10731200377146403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.10941333572069804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.1074720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.09937066833178203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.10918933153152466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.11432000001271565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.10935466488202412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.11101333300272624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.10829333464304607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.10520000259081523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.10963732997576396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.10592533151308696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.1090133289496104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.10949866970380147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.10748799641927083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.10895466804504395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.10559999942779541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.06196799874305725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.09930133819580078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.06232533355553945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.06413333117961884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.06275733311971028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.06613866488138835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.061861331264177956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.1967680056889852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.05719999969005585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.057962665955225624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.062181333700815834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.06243200103441874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.10754666725794475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.06258133550484975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.0589279979467392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.0625600020090739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.06334400177001953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.06249066690603892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.058389330903689064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.060640002290407814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.06293333570162456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.060421332716941833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.06165866553783417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.039994666973749794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.04200000067551931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.06089066465695699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.04020266731580099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.04115733255942663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.04116799930731455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.042122667034467064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.04154133299986521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.042133331298828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.04085866610209147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.026922665536403656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.02848000079393387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.02603200078010559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.027690666417280834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.026752000053723652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.061290666460990906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.024661332368850708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.023007998863856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.023792001108328503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.02384000023206075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.02378133436044057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.02254933367172877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.6854293346405029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.6751519838968912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.6789600054423014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,0.6255733172098795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.6684160232543945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.6115520000457764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.6942240397135416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.679423967997233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.6873599688212076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,0.63045334815979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.6750133037567139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.6116960048675537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.706175963083903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.6915199756622314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.6991199652353922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,0.715727965037028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.6862453619639078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.6958613395690918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,0.6887093385060629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.6781280040740967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.6821440060933431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,0.7198452949523926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.6695466836293539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.36508798599243164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.7060906887054443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.35914134979248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.3612906535466512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.3548746506373088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.35552000999450684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.35077865918477374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.34492266178131104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.3477120002110799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.3197919925053914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.36087465286254883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.31244800488154095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.353551983833313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.3479626576105754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.3511679967244466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.3227039972941081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.3448479970296224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.31539199749628705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.3596746524175008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.34280534585316974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.3579519987106323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.34614400068918866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.35186131795247394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.3394720156987508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.35150400797526044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.34806398550669354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.35391998291015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.3449866771697998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.34306132793426514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.3362026611963908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.19363200664520264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.18995734055836996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.19157866636912027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.19235199689865112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.18862932920455933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.18702399730682373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.18294399976730347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.17904533942540488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.1816800038019816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.16972267627716064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.17940799395243326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.16556266943613687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.1862186590830485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.1828533411026001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.18505066633224487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.17125866810480753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.18101332585016885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.16664533813794455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.18963199853897095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.18548266092936197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.189520001411438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.18197866280873617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.1856266657511393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.18581332763036093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.18300267060597739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.18339200814565024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.18239466349283853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.18110400438308716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.1783413290977478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.10566400488217671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.10439466436704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.10384533802668254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.10553066929181416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.10368000467618306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.10497066378593445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.1760586698849996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.09798399607340495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.09698133667310078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.09777599573135376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.08913600444793701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.09869866569836934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.34727998574574787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.09315733114878337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.09510933359464009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.090938667456309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.10134933392206828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.09937600294748943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.10091200470924377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.09714133540789287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.09726400176684062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.0955573320388794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.09831999739011128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.09739733735720317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.09913600484530131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.09763200084368388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.09706667065620422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.09678399562835693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.05961066484451294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.05906666815280914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.062234664956728615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.057087997595469155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.058320000767707825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.0562720000743866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.05835733314355215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.052101333936055504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.05830933153629303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.05600533386071523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.05551466842492422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.05881066620349884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.058143998185793556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.05789866546789805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.05691199998060862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.05746666590372721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.09723200400670369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.037920000652472176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.05532266696294149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.03716266651948293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.05784533421198527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.036303999523321785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.03585066646337509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.03674133370320002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.036533333361148834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.0378506655494372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.037248000502586365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.03793066740036011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.03698666642109553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.023599999646345775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.05499733487764994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.023930666347344715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.02607999990383784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.021802666286627453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.02603200078010559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.02032533288002014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.020165332903464634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.02022933339079221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.02013333390156428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.019551999866962433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.019760000209013622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.3690720001856486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.36949865023295086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.3657279809315999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.36581865946451825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.3418240149815877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.37461864948272705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.37401068210601807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.3316799998283386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.3696639935175578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.3300586740175883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.3403466542561849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.3818613290786743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.37700267632802326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.3587679862976074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.3725920120875041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.35869868596394855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.37533867359161377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.3739466667175293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.3689546585083008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.3649760087331136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.3697226842244466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.2023786703745524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.3778560161590576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.20179200172424316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.19808000326156616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.3819040060043335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.19433599710464478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.19926400979359946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.19400533040364584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.1904319922129313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.1902773380279541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.18948266903559366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.17675199111302695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.18997865915298462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.17649600903193155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.19351466496785483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.19476799170176187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.1916266679763794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.17519466082255045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.19246933857599893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.17292799552281699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.3625013430913289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.19886932770411173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.19592533508936563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.18531733751296997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.19775466124216715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.1846026579538981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.19387733936309814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.19459199905395508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.18684266010920206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.19290665785471597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.1875093380610148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.11190399527549744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.11010666688283284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.1994826594988505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.11054933071136475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.10838933785756429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.10332799951235454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.10469866792360942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.10105066498120625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.0958720048268636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.19136534134546915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.09493866562843323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.10538132985432942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.10414399703343709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.10366400082906087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.09371733665466309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.10565333565076192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.1104800005753835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.10703999797503154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.1069493293762207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.10532800356547038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.10113599896430969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.10479467113812764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.10152533650398254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.1055413285891215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.10574932893117268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.1046506663163503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.10335466265678406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.10409067074457805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.10353599985440572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.06028800209363302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.058277333776156105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.05900266766548157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.05836800237496694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.0584746648867925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.05395199855168661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.057818666100502014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.054570664962132774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.05874133110046387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.058229332168896995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.10187733173370361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.057914664347966514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.06039999922116598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.059877331058184304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.05625066657861074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.060458665092786155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.05725333094596863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.05842133363087972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.0580320010582606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.05596266686916351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.0395359992980957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.037578667203585304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.0393653338154157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.03789866715669632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.03969600051641464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.05942933261394501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.09501866499582927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.03807999938726425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.03835733234882355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.025818665822347004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02606933315594991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.025962665677070618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.026426665484905243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.01825599993268649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.018112000077962875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.018320000420014065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.2660106619199117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.26629332701365155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.2646346688270569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.2638933261235555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.2403306762377421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.2671839992205302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.2677706678708394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.2639039953549703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.23688000440597534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.26505066951115924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.2350026567776998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.24093866348266602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.27007466554641724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.2674986720085144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.24677334229151407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.24649065732955933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.26659733057022095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.2653226653734843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.2659200032552083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.2483946681022644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.265338659286499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.2468106746673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.1463573376337687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.1451359987258911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.14421866337458292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.13805866241455078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.1442506710688273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.1381226678689321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.13878400127092996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.13949333628018698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.1383039951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.12611732880274454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.13806399703025818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.12407466769218445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.14018666744232178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.1398240029811859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.13886933525403342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.12473600109418233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.13958932956059775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.12471466263135274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.14190933108329773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.14243732889493307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.14072533448537192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.13192533453305563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.26930665969848633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.12989866733551025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.1400320033232371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.14005866646766663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.14138133327166238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.13404267032941183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.2667146722475688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.13409066200256348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.07868800063927968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.07868266602357228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.07673066854476929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.0786186655362447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.07567466795444489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.0757120003302892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.07484800120194753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.06850133339564006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.14122133453687033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.07657599945863088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.06850666801134746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.07652799785137177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.07662400106589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.07022400200366974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.07525333265463512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.1422879993915558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.0786293347676595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.07796266674995422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.07692799965540568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.07229866584142049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.07725333174069722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.07189333438873291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.0761706680059433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.07233066856861115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.0730506678422292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.04770133395989736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.04795733094215393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.07045866549015045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.04553066690762838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.04604800045490265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.044213334719340004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.04775466521581014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.047925333182017006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.04584000011285146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.04604800045490265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.04417066772778829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.046053335070610046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.0455626646677653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.03136533250411352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.030282666285832722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.03134933362404505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.031109333038330078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.04665066798528036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.022416000564893086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.016528000434239704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.21188799540201822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.2100480000178019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.20992000897725424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.21106666326522827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.18862932920455933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.21197332938512167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.2118826707204183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.21086400747299194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.1893493334452311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.21025067567825317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.18921067317326865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.21199466784795126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.214464008808136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.21254400412241617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.1950506567955017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.2129813234011332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.2119413415590922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.21246933937072754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.19493865966796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.18784000476201376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.2128480076789856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.19591999053955078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.11466667056083679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.11365333199501038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.11372799674669902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.10732799768447876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.113237331310908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.2121973236401876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.10961600144704182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.10959999759991963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.10943999886512756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.10012267033259074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.11124266187349956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.1011253297328949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.11155733466148376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.11152533690134685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.10965333382288615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.10140800476074219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.10950932900110881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.10094933708508809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.11252267162005107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.11170132954915364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.11155733466148376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.10394133130709331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.11166933178901672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.11143466830253601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.11109333237012227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.1034346620241801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.10975999633471172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.0653546651204427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.06427733103434245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.06578133503595988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.061978667974472046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.06452266871929169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.06428800026575725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.06459199885527293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.06390400230884552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.06037333110968272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.06000000238418579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.06451199948787689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.06431999802589417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.06424533327420552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.05858666698137919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.060165335734685264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.06530133386452992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.06613333523273468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.06038933495680491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.06437333424886067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.060677334666252136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.06468266745408376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.11180266737937927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.039317332208156586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.03932266682386398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.037962667644023895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.03950933367013931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.19434666633605957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.03806933263937632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.03945599993069967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.03860799968242645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.03957866628964742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.036933332681655884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.03859733293453852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.038165333370367684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.026208000878492992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.027061333258946735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.026346666117509205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.02088533341884613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.020784000555674236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.027855999767780304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.019658666104078293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.016186666985352833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.01623999948302905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.18095467487970987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.18107734123865762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.1811306675275167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.1647040049235026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.18119466304779053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.1646293302377065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.18266665935516357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.18263999621073404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.18080000082651773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.1646293302377065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.18101867039998373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.16491732994715372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.18260266383488974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.18315200010935465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.18323200941085815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.16683199008305868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.18098666270573935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.1686506668726603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.18245333433151245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.1812480092048645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.18332799275716147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.18149866660435995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.16877333323160806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.09937066833178203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.0993386705716451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.09221866726875305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.09776533643404643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.0972106655438741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.09836266438166301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.168613334496816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.09925333658854167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.09921600421269734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.09928533434867859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.09716266393661499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.09941333532333374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.09929600358009338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.09100266297658284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.08939733107884724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.10011200110117595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.09289066990216573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.09924800197283427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.09124799569447835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.09830400347709656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.09923733274141948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.09921066959698994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.09275199969609578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.09929600358009338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.056277334690093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.05799466868241628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.05423999826113383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.056261335810025535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.05630933245023092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.05602666735649109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.05186133086681366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.05588266750176748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.05198933184146881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.05585599939028422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.057333335280418396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.056373332937558494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.05246399839719137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.09180800120035808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.05595199763774872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.05842133363087972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.09909333785374959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.05462933580080668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.058378666639328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.05629333357016245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.05656533439954122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.05327466626962026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.056474665800730385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.0524479995171229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.03418133407831192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.035029334326585136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.03368533402681351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.033200000723203026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.035274667044480644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.03206400076548258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.024133334557215374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.02438933402299881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.0242399995525678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.025029333929220837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.024533333877722423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.024821333587169647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.016447999825080235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,0,0.1567146678765615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,0,0.15651733676592508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,0,0.15685333808263144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,128,1,fp8,fp8,0,0.14316800236701965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,0,0.1564906636873881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,64,0,1,fp8,fp8,0,0.1423413356145223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,0,0.15625066558519998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,0,0.15653333067893982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,0,0.15657599767049155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,128,1,fp8,fp8,0,0.14245866735776266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,0,0.15647466977437338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,64,0,1,fp8,fp8,0,0.14239466190338135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,0,0.1567253371079763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,0,0.1567573348681132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,0,0.15648000439008078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,128,1,fp8,fp8,0,0.1423786679903666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,0,0.1561973293622335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,64,0,1,fp8,fp8,0,0.14244799812634787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,0,0.15654399991035461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,0,0.1572213371594747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,0,0.1566933294137319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,128,1,fp8,fp8,0,0.14246933658917746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,0,0.15799466768900552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,64,0,1,fp8,fp8,0,0.14251200358072916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,0,0.08489599823951721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,float16,fp8,0,0.0848479966322581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,fp8,fp8,0,0.07668266693751018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,fp8,fp8,0,0.0767680009206136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,0,0.08478400111198425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,0,0.08497066299120586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,0,0.0848479966322581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,0,0.08483733733495076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,0,0.08469333251317342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,0,0.08506133159001668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,0,0.084906667470932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,128,1,fp8,fp8,0,0.07893866797288258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,64,128,1,float16,float16,0,0.08506666620572408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,fp8,fp8,0,0.07778133451938629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,0,1,fp8,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,0,0.08493866523106892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,0,0.08509332935015361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,fp8,fp8,0,0.07682666679223378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,64,128,1,fp8,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,0,1,fp8,fp8,0,0.07841066519419353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,0,0.08502399921417236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,0,0.08508800466855367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,128,1,fp8,fp8,0,0.07856533428033192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,0,0.0846666693687439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,64,0,1,fp8,fp8,0,0.0788320004940033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,0,0.049866666396458946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,0,0.0846453309059143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,0,0.050016000866889954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,0,0.08481599887212117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,0,0.05154666801293691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,0,0.04996266464392344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,64,0,1,fp8,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,0,0.0499946673711141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,128,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,128,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,0,0.05064000189304352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,fp8,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,128,1,fp8,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,64,0,1,fp8,fp8,0,0.045925334095954895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,64,0,1,fp8,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,0,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,0,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,0,0.031983998914559685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,128,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,float16,float16,0,0.031221332649389904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,64,128,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,0,0.03192000091075897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,fp8,fp8,0,0.03189333279927572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,0,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,0,0.03275733441114426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,128,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,0,0.03324266771475474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,fp8,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,64,128,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,0,0.05160533388455709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,0,0.02493866781393687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,128,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,0,0.025029333929220837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,0,0.0239680012067159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,128,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,64,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,64,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,0,0.017674667139848072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,128,1,fp8,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,1.339743932088216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,1.349199930826823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,1.1957013607025146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,8.314997355143229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,8.313936233520508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,1.364810625712077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,1.3923360506693523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,7.520954767862956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,1.2185813585917156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,1.3725652694702148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,8.310314814249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,1.3822719256083171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,8.33566919962565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,7.544362386067708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,1.232357343037923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,1.4067734082539876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,8.34015973409017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,1.4207146962483723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,8.354874928792318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,1.277743975321452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,8.372512181599935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,0.7928000291188558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,7.558490753173828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,0.8089173634847006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,8.408730824788412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,4.341050783793132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,0.7535839875539144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,0.7067253589630127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,7.622901280721028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,4.368949254353841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,3.952986717224121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,0.6338293155034384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,4.230736096700032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,0.7118346691131592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,4.24124813079834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,3.8527679443359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,0.7160000006357828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,0.6398346821467081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,0.7112053235371908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,4.240133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,0.7167680263519287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,0.7237066427866617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,3.859487851460775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,4.249946594238281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,4.248192151387532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,0.7317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,0.6481279929478964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,3.860661188761393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,4.262853304545085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,0.7397920290629069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,0.6656213204065958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.4384053150812785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,4.267877260843913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.4501546621322632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,3.8869654337565103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,2.26529598236084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.4149706761042277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.3988853295644124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,2.070144017537435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,4.2885386149088545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,2.2156373659769693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.3760213454564412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,2.2750879923502603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,2.217738628387451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,2.021882692972819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.40252800782521564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.40462398529052734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.3671199878056844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,2.2176693280537925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.40597331523895264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,2.0206185976664224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.40019198258717853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.408245325088501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.37001065413157147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,2.221951961517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.4128426710764567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,2.027637322743734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.42072534561157227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.37808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,2.231152057647705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,2.224127928415934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.3124319911003113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,2.0339199701944985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.31169066826502484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,1.2791093190511067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.2896160085995992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,2.225466728210449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.2793760299682617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,2.2419625918070474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,1.1642186641693115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.3095146616299947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.2876960039138794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.2702133655548096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.3101653258005778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,1.2673973242441814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.3102239966392517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.2718186378479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.2710986932118733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,1.1620906988779705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.3102186719576518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,1.165493329366048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.28753600517908734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.3110400040944417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.2707626819610596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.2891146739323934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.2716586589813232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,1.1646880308787029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.3096959988276164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.31037867069244385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.275765339533488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.2879146734873454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.3104213277498881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.2758560180664062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,1.1634666919708252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,1.0040853023529053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,1.0117066701253254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,0.8924266497294108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,1.0115520159403484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,4.90126387278239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,4.900709470113118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,4.4328053792317705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,0.9059840043385824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,4.908597310384114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,1.0217759609222412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,1.0213226477305095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,4.910895983378093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,1.0320266882578533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,4.447733243306478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,0.9194613297780355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,1.0462666352589924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,4.9248959223429365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,1.0571680068969727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,4.459775924682617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,4.93119462331136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,0.9500799973805746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.5978933175404867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,4.947109222412109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.6114826599756876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,0.5580373207728068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,4.502021471659343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,4.965189297993978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.532810648282369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,2.607253392537435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,2.6007307370503745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,2.520106633504232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,0.48023466269175213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,2.3694987297058105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,2.5243679682413735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.5377813180287679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.5426773230234782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,2.5255093574523926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,0.486410657564799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,2.291072050730387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.5427093505859375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,2.530394713083903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,2.29258139928182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.547818660736084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,0.4911946853001912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.5368906656901041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,2.532677332560221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.5526666641235352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.5603359937667847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,2.538442611694336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,2.297210693359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,0.50436798731486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,2.5476907094319663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.3332906762758891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.34088532129923504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,1.375450611114502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,2.5545973777770996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.3172960082689921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.3015039960543315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,1.383461316426595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.3021013339360555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,1.3341919581095378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.27730133136113483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,2.311274687449137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.304149329662323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,1.333189328511556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,1.218773365020752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.304474671681722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.27932800849278766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,1.3359626134236653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.3078773419062297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,1.3380640347798665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,1.2220213413238525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.3166559934616089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,1.341061274210612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,1.2581706841786702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,1.3446399370829265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,1.223743995030721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.3182826638221741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,1.350645383199056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.2836959958076477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.2897866765658061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.2363626758257548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,1.3544267018636067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.31270400683085126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,0.7979839642842611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.22050132354100546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,0.7979946931203207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,0.7279573281606039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.2339786688486735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,1.2322719891866047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.23635733127593994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.23423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.21825599670410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,0.7911307017008463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,0.7258026599884033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.23321600755055746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.23278933763504028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,0.7914453347524008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,0.789626677831014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.21786133448282877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.23441600799560547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,0.7261066436767578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.23271467288335165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,0.7935787041982015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.21795733769734701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.23441600799560547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,0.793557325998942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.23477333784103394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,0.794426679611206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,0.7911146481831869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.22195732593536377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,0.7966453234354655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,0.7280480066935221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,0.8379626274108887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,0.7272000312805176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,0.7454079786936442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,3.5344107945760093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,0.8446986675262451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,3.198645273844401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,3.5340000788370767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,0.8513226509094238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,0.7550826867421468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,0.844042698542277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,3.5347572962443032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,0.8503626982371012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,3.202218691507975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,0.8592053254445394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,0.7651039759318033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,3.544645309448242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,3.5475521087646484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,0.8698240121205648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,3.2133919397989907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,0.880394697189331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,0.7884639898935953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,3.5751094818115234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,3.5611359278361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,3.5782667795817056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,3.23747189839681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.5135466655095419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.46621867020924884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.5009013414382935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,1.899829387664795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,1.7224799791971843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.44731199741363525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.4502026637395223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,1.8921386400858562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,1.8243999481201172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.451909343401591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,1.8270667394002278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,1.6590879758199055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.4554400046666463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.4063466787338257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,1.827397346496582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.4540426731109619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,1.8340373039245605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,1.6639946301778157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.45982933044433594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,1.8354934056599934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,1.83951997756958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,1.668384075164795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.46373867988586426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.4124533335367839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.4697386821111043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.40454399585723877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,1.8449172973632812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.2813226580619812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,1.8524053891499836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.28621866305669147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.2653226653734843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.42285335063934326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,1.0138133366902669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,0.9248906771341959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.25061333179473877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.25124800205230713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,0.9740426540374756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.23270932833353677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,0.9774666627248129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,0.8916640281677246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.25197333097457886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,1.6782026290893555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,0.9757866859436035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,1.007482687632243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,0.9793012936909994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,0.8930239677429199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.25697600841522217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.2528266708056132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.2374346653620402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.23456533749898276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,0.9808639685312907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,0.8970399697621664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,0.9812640349070231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.2664960026741028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.2441706657409668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.26099199056625366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,0.990991989771525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,0.985701322555542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.20140800873438516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.20152000586191812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.5991093317667643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.18804800510406494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.5975786844889323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.5480800072352091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.19946666558583578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.18532800674438477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,0.9023253122965494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.5929333368937174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.5440320173899332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.19965867201487222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.5919040044148763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.18382400274276733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.5934240023295084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.5437920093536377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.19932266076405844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.1997386614481608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.20112532377243042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.18717867136001587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.5950080156326294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.5463093519210815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.20030933618545532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.199455996354421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.5933599869410197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.199727992216746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.5966773430506388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.1853920022646586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.5948053201039633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.5621066490809122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,1.3061493237813313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,1.314031998316447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.5916159947713217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,1.1605546474456787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,1.3272213141123455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,4.680368105570476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,1.3387786547342937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,4.680858612060547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,1.1862986882527669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,4.7061812082926435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,4.219754536946614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,1.3399573961893718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,4.25059191385905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,1.3496373494466145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,1.2005279858907063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,4.711813290913899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,4.711615880330403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,1.3758506774902344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,1.387056032816569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,4.728405316670735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,1.247477372487386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,4.761232058207194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,0.7609919706980387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,4.266053199768066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,0.7747840086619059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,4.772800127665202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,0.7073120276133219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,4.306026776631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,0.6751999855041504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,2.5108960469563804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,2.2621493339538574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,0.6783040364583334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,2.48743470509847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,2.38154665629069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,2.406485398610433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,0.6771999994913737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,0.6004480123519897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,0.6839839617411295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,0.6067999998728434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,2.4074986775716147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,2.153055985768636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,0.6826879978179932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,2.3938345909118652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,2.164250691731771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,0.6895253658294678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,2.3967466354370117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,2.407130718231201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,0.6172373294830322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,0.7069760163625082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,2.419285297393799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,0.6331253449122111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,0.6990986665089926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.41319998105367023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,2.427509307861328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,2.1866559982299805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.4145653247833252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.37909332911173504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,2.1722559928894043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.36026668548583984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,1.2918026447296143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,1.2431146303812664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,1.3025920391082764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.32715733846028644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.36340800921122235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,1.241477330525716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,1.126197338104248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,1.1818400224049885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.36742933591206867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,1.24345064163208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.3305013378461202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.368064006169637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.2456586360931396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,1.13155730565389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.3716213305791219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,1.247770627339681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,1.2550186316172283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,1.1388533115386963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.3741066853205363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.3636480172475179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.37995731830596924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.34335466225941974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,1.2651840051015217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,1.1425440311431885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.23027199506759644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,0.702677329381307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.21795733769734701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.33645331859588623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,0.703226645787557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.6437386671702067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.2078346610069275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.20752533276875815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.23602133989334106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,1.2626187006632488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.6149493455886841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.20795732736587524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,0.672106663386027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.1916960080464681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.19152534008026123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,0.6713013648986816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,0.6738186677296957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.6159146626790365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.20839466651280722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.21076265970865884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,0.6760906378428141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.20094400644302368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,0.6725333531697592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,0.6829919815063477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.6204213301340739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.21609600385030112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.20106132825215658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,0.6852853298187256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.2080906629562378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.166485329469045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.6256320079167684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.16679465770721436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,0.6815946896870931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.15588266650835672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.4265386660893758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.3917386531829834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.1625546713670095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.42263468106587726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.21420800685882568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.422815998395284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.38814934094746906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.16292267044385275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.42185068130493164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.15037332971890768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.16462399562199911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.38834134737650555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.4261653423309326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.4220159848531087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.16273066401481628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.16296000281969705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.1523413360118866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.4235946734746297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.3880053361256917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.16478932897249857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.42420800526936847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.16659733653068542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.16299200057983398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.15245866775512695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.42423466841379803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.38995734850565594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,0.9772693316141764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,0.9850719769795736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,0.8654507001241049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,2.8357226053873696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,0.9878239631652832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,2.8438507715861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,2.5564746856689453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,0.9954720338185629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,0.8952319622039795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,2.8458452224731445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.4212640126546224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,0.9983519713083903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,2.571290651957194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,1.0049493312835693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,2.8623040517171225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,2.85588804880778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,2.8671627044677734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,0.8939200242360433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,1.0196159680684407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,2.582575956980387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,1.0306933720906575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,0.9259733359018961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,2.8926827112833657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.5746399958928426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,2.8974507649739585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.5846399863560995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,1.5332852999369304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,0.5323413213094076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.5066933234532675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,2.616624037424723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,1.394752025604248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.5121013323465983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,1.4555199940999348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,1.4631199836730957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,1.316981315612793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.5242400169372559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,0.45558400948842365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,1.4619305928548176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,0.4598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,1.5429333051045735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,1.3209599653879802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.5187306801478068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.5153013467788696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.5236853361129761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,1.4682025909423828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,1.467408021291097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,1.3281013170878093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.529584010442098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.5362506707509359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,1.4817546208699544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,0.4657333294550578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,0.4802079995473226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.30848000446955365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,1.4722293217976887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,1.487888018290202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.31482134262720746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,0.8058506647745768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,0.8125226497650146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,0.7401119867960612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.2730666597684224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,0.7662560145060221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.25036267439524335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,1.3389652570088704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.28860267003377277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,0.768234650293986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,0.6975413163503011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.28016533454259235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.27820799748102826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.2521013418833415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,0.7692106564839681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.2815200090408325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,0.7705653508504232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.2809813419977824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,0.7747999827067057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.25672000646591187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,0.7775359948476156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,0.7047786712646484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.28565333286921185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.28993600606918335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,0.7004799842834473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.26312534014383954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,0.7833013534545898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,0.7118026415506998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.17532267173131308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.18033599853515625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.1672053337097168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,0.7810666561126709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.4490506649017334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.15625066558519998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.42950932184855145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.156741331020991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.4468959967295329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.42423466841379803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.2763413389523824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.3856480121612549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.15762133399645487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.4118880033493042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.1564586659272512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.1430506706237793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.424885352452596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.3858986695607503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.15667200088500977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.4248533248901367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.15871999661127725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.42766400178273517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.3920053243637085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.16132799784342447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.42610132694244385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.15289066235224405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.43250131607055664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.39793598651885986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.12423466642697652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.28331732749938965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.1628266672293345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.11923733353614807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.28039467334747314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.14226133624712625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.2590506672859192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.12221866846084595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.27874133984247845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.12384000420570374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.116047998269399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.27930667002995807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.25700799624125165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.12267200152079265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.2783733407656352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.12367467085520427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.11740266283353169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.27773867050806683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.2569440007209778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.1225440005461375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.278383990128835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.11619200309117635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.2796106735865275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.2569653391838074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.1237333317597707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.2824479937553406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.12370666861534119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.12350400288899739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.11564266681671143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.27937066555023193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.2573759953180949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.4306986729303996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,1.29204265276591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,1.3002080122629802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,1.1438240210215251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,2.848853429158529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.12363200386365254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,1.312986691792806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,2.862709363301595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,2.552010695139567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,1.321343978246053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,1.1689759890238445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,2.871034622192383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,1.3233493169148762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,2.8842185338338218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,2.5791145960489907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,1.3338452974955242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,1.1836586793263753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,1.3651413917541504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,2.894357363382975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,2.5950719515482583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,2.885626792907715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,1.3728747367858887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,1.2278239727020264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,2.9334452946980796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,0.7460853258768717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,0.7572906812032064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,2.639647960662842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,0.6881972948710123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,1.5574026107788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,1.409989356994629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,0.6542400121688843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,2.9360478719075522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,1.5478986104329426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,0.6601226727167765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,1.449893315633138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,0.661850651105245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,1.4468000729878743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,1.3000373045603435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,0.6671573321024576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,0.5906933148701986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,1.4551466306050618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,0.5825813213984171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,0.6670506795247396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,1.4583892822265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,0.672981341679891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,1.460645357767741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,1.4691626230875652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,1.3146986961364746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,1.3056053320566814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,0.6837493578592936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,0.5966720183690389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,0.6890347003936768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,1.4851093292236328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.3901120026906331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,0.8011946678161621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.39766399065653485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.36130666732788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,1.3326133092244465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,0.8093067010243734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,0.7305066585540771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.3433653513590495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,1.4766613642374675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,0.7517440319061279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.3562186559041341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.31040533383687335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.34514665603637695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,0.7533120314280192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.3550399939219157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,0.6172213157018026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.31513067086537677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,0.7569066683451334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,0.6826079686482748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.3494400183359782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,0.7592693169911703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,0.755232016245524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.3184746702512105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.3561600049336751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,0.7629919846852621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,0.687391996383667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.363103985786438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,0.7678933143615723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,0.6791413625081381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.3262773354848226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,0.7727039655049642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,0.7009812990824381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.2118933399518331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.21599467595418295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.42951468626658124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.43350398540496826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.40572798252105713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.36376531918843585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.4004586537679036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.17247466246287027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.39981333414713544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.367792010307312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.18658665815989176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.18408532937367758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.18557333946228027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.1728960076967875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.40434134006500244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.19934399922688803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.18931732575098673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.40223999818166095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.1885653336842855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.17543999354044595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.4070880015691121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.3705226580301921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.1954186757405599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.40556267897288006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.19181867440541586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.18157867590586343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.41658135255177814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.37273601690928143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.12287466724713643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.24435732762018839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.41210134824117023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.12628799676895142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.19964265823364258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.24614399671554565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.2294399936993917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.11102933684984843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.23100266853968301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.1113866666952769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.099263995885849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.3785119851430257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.2096959948539734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.11115733782450359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.23194666703542074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.10122666756312053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.119759996732076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.2323840061823527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.21009065707524618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.11154133081436157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.23214934269587198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.10153067111968994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.23199999332427979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.2320853273073832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.21167999505996704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.11179199814796448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.11143466830253601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.23340266942977905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.10831999778747559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.23404266436894736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.21612266699473062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.0881119966506958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.16269866625467935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.08861333131790161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.11152533690134685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.1625226636727651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.15049599607785544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.08852266271909077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.16379200418790182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.08456533153851827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.16316800316174826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.15051733454068503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.08879466851552327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.16314666469891867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.08891200025876363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.16251200437545776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.0825973351796468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.08892800410588582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.16359466314315796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.08896533648173015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.08296533425649007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.16244266430536905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.15034133195877075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.08830933769543965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.16427200039227804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.08823466300964355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.08461333314577739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.14910399913787842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.11346667011578877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.16293332974116007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,0.9668746789296468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,1.7996907234191895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,0.974064032236735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,0.8554933071136475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,1.8098133405049641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,1.6065173149108887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,0.9798080126444498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,0.986128012339274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.15041066209475198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,1.8101226488749187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,0.870965321858724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,1.0062026977539062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,1.8171040217081706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,1.8176320393880208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,0.995253324508667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,1.827797253926595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,1.631498654683431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,1.6198399861653645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,1.0132959683736165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,0.8978933493296305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,1.019221305847168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,1.847333272298177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,0.9124320348103842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.5635786851247152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,1.8521226247151692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.5788053274154663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,0.5353120168050131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,1.0024693012237549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,0.9041120211283366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.49505066871643066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,0.9214080174763998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,0.4409866730372111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,0.9228746891021729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,0.8266879717508951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,0.9924906889597574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.5017600059509277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,0.9241387049357096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.5183519919713339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,0.4466506640116374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,0.9279680252075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,0.8319040139516195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,1.664031982421875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.5091840028762817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,0.45395199457804364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,0.9355359872182211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.49853865305582684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,0.8374826908111572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.5154879887898763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.5206079880396525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,0.9419840176900228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,0.47886399428049725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,0.9495573043823242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.5041280190149943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,0.8488000233968099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.5193119843800863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.2754666606585185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.5254613161087036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.4782133499781291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.29799999793370563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,0.9300053119659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.48006399472554523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.261845330397288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.23830399910608926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.4830133517583211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.44363200664520264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.2617280085881551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.4813013474146525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.25996800263722736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.3028906583786011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.48577598730723065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.4392053286234538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.26636266708374023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.4870400031407674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.24411733945210776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.2630880077679952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.49089598655700684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.4440213441848755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.2736213405927022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.4946719805399577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.24904000759124756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.2686026692390442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.5012373526891073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.44948267936706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.16265599926312765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.2405866583188375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.16485866904258728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.28599466880162555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.26412800947825116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.1383573313554128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.2600959936777751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.1400159994761149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.1272213359673818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.2616106669108073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.2437280019124349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.1402506629625956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.26022400458653766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.12989866733551025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.2624800006548564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.2831146717071533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.23848533630371094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.2773333390553792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.26284799973169964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.14436800281206766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.13542399803797403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.14014400045077005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.26609599590301514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.24332267045974731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.1463520030180613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.2677653431892395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.14018666744232178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.2722240090370178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.25058666865030926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.0918933351834615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.1630293329556783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.09363733728726704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.09148266911506653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.16683199008305868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.15635733803113303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.08524266878763835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.1563093364238739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.08525866270065308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.07670400043328603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.1567093332608541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.14203199744224548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.08505066235860188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.15667200088500977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.08456533153851827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.07668800155321757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.1575040022532145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.14201600352923074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.08506133159001668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.15651733676592508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.08550399541854858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.07839466631412506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.15834133823712668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.14422399799029031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.1426346699396769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.158053328593572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.08593066533406575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.08081066608428955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.1583626667658488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.1442453364531199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.0683840016523997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.11710932850837708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.06597866614659627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.1170186698436737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.10797333717346191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.11761599779129028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.14851199587186178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.06452266871929169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.11633066336313884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.10749333103497823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.0681386689345042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.11752532919247945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.06621333460013072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.11742400129636128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.10844799876213074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.06837333242098491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.08516266942024231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.06811733543872833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.06630933284759521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.11767466862996419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.10763200124104817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.06835733354091644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.06857599814732869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.11585600177447002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.11581333478291829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.10729066530863444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,1.3134933312733967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.06390933195749919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,1.961850643157959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,1.311520020167033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,1.1450080076853435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,1.9626399676005046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.11586667100588481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,1.7283627192179363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,1.336400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,1.3384265899658203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,1.159600019454956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,1.9931626319885254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,1.98526398340861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,1.3629973729451497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,1.7462293306986492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,2.0162347157796225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,1.3761119842529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,1.1722559928894043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,2.003386656443278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,1.3910133043924968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,2.041920026143392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,1.3833759625752766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,2.0375307401021323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,1.8064640363057454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,0.7493813037872314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,1.216970682144165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,0.7521920204162598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,0.6847519874572754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,1.0863306522369385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,1.0062719980875652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,1.760170618693034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,0.6517279942830404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,0.9814666906992594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,1.0845706462860107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.6559679905573527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,0.5762613217035929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,0.9799199899037679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,0.9885066350301107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,0.6634133259455363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,0.584826668103536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,0.9939306577046713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,0.8821866512298584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,0.6796693007151285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,0.9952906767527262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,0.6081120173136393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,0.9980959892272949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,0.6582560141881307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,0.669157346089681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,0.6824853420257568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,0.6887839635213217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,0.6124533414840698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,0.8902719815572103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,0.9130240281422933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,1.019759972890218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.38356268405914307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.5551573435465494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,1.0129813353220622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.3887360095977783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.3535360097885132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.5582613150278727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.5094453493754069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.3336319923400879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.505184014638265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.30103466908137005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,0.908122698465983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.5085493326187134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.3371359904607137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.5088853438695272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.34080533186594647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.3063253362973531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.5109279950459799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.45975999037424725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.3384639819463094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.3425973256429036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.5174186627070109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.3476853370666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.455946683883667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.5180746714274088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.35285866260528564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.5234933296839396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.3200426697731018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.5272479852040609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.30875200033187866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.48073601722717285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.20359466473261514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.46409066518147785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.2959360082944234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.19126399358113608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.3562026818593343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.2733706633249919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.174618661403656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.2651946743329366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.16244799892107645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.26734934250513714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.24620266755421957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.20800000429153442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.29928000768025714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.17916800578435263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.17525333166122437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.26740266879399616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.24698134263356528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.1803413430849711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.1771786610285441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.2698720097541809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.26741333802541095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.16690133015314737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.2722773353258769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.24925333261489868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.18743467330932617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.2781173388163249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.18954133987426758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.17377066612243652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.27906666199366253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.256933331489563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.11340799927711487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.1649066706498464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.1830986738204956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.16672533750534058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.16492266456286112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.10019733508427937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.1495733360449473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.1002400020758311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.11541866262753804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.09929600358009338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.15198933084805807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.15152000387509665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.1349440018335978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.10170666376749675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.15531200170516968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.1525173286596934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.13620799779891968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.10140800476074219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.15074666341145834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.10143466790517171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.09101866682370503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.15331733226776123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.1378506620724996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.10181333621342976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.15317866206169128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.10378133257230122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.09557867050170898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.155648003021876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.1422826647758484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.09085866808891296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.09934932986895244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.06901866694291432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.10125866532325745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.09344533085823059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.0640533318122228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.09514666597048442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.06284800171852112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.09512533744176228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.08699733018875122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.0953386624654134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.09671466549237569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.08724799752235413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.06426666676998138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.06326933205127716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.0962506632010142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.08683199683825175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.09573866923650105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.06439466774463654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.059877331058184304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.09735999504725139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.08849066495895386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.07483200232187907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.07577066620190938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.05484800040721893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.07660266757011414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.054842665791511536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.05217599868774414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.05378133555253347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.0743999977906545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.052144000927607216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.07559466858704884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.06952000161012013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.05516799787680308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.07481599847475688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.05553600192070007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.07537066439787547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.06715199848016103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.05421866476535797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.07568533221880595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.051514665285746254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.07044266661008199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,0.964458703994751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.2821333408355713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,0.9681066672007242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,0.8490400314331055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.2866453329722087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,0.9780533313751221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.3009973367055256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,0.9817333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.3036106427510579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.1481760342915852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.134991963704427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.313429355621338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,0.8600959777832031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,0.8711786270141602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.3127466837565105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,0.9922613302866617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.1580959955851238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,1.0129120349884033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,0.9925013383229574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,1.0148693720499675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,0.8960533142089844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.3370985984802246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.183578650156657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.5606880187988281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,0.7276373704274496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,1.334112008412679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,0.532693346341451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,0.7354026635487875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,0.665450652440389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.4870826800664266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.6522719860076904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.49571200211842853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.5677226781845093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.43404801686604816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.6564480066299438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.582698663075765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.6577920118967692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.499290664990743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,0.44190935293833417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.6614346504211426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.5892693201700846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.49945068359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.6642080148061117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.6997919877370199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.494917352994283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.5934986670811971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.5114080111185709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,0.6901386578877767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.5041226545969645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,0.4578506549199422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,0.6811839739481608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.2917226751645406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.5160746574401855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.3782613277435303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,0.4471786816914876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.2958773374557495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.26871466636657715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.38420267899831134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.3495039939880371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.2583413322766622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.3373546600341797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.23033066590627035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.34016533692677814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.30798933903376263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.25296000639597577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.33938666184743244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.2558133403460185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.2327359914779663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.34163200855255127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.3105226755142212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.25880000988642377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.3440693219502767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.2550453344980876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.26206932465235394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.23635733127593994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.3471999963124593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.2659626603126526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.3516639868418376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.28360533714294434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.24225600560506186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.3567306598027547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.32074133555094403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.20493332544962564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.15958933035532633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.14863999684651694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.20805867513020834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.19075200955073038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.13154666622479758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.1567573348681132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.13292800386746725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.12178666392962138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.18224000930786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.3149919907251994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.16510933637619019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.1318933367729187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.18016533056894937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.1339306632677714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.18187199036280313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.16590399543444315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.1341546674569448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.18189332882563272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.1364479959011078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.12723199526468912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.18407466014226279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.17008533080418906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.1402186652024587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.18816532691319784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.1423466702302297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.13411200046539307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.188917338848114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.1770346760749817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.11351999640464783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.17782400051752725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.08468266328175862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.11564266681671143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.11024533708890279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.07675733168919881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.10434133807818095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.07854400078455608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.0844586690266927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.08732799688975017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.09538132945696513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.07766399780909221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.10513599713643391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.0705866664648056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.10586667060852051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.09607999523480733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.07878933350245158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.10651733477910359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.0724533349275589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.10708799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.07868800063927968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.10842133561770122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.075013334552447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.10954667131106059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.6081173419952393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.052202666799227394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.07288533449172974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.0693333347638448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.049866666396458946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.07025066514809926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.0993333359559377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.050288001696268715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.04647466540336609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.07083199918270111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.050293331344922386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.07016000151634216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.048154667019844055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.0732586681842804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.06443733473618825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.04958933095137278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.07049066821734111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.0472320020198822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.07062933345635732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.06454933186372121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.07089599967002869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.10598933696746826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.072202667593956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.0649599979321162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.04139200101296107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.0507893313964208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.0705386648575465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.051967998345692955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.040448000033696495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.05305600166320801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.03933866570393244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.05212800204753876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.049733335773150124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.05377600093682607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.05395199855168661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.1359360218048096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,1.3453332583109539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.1341013113657634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,1.018448034922282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,1.3421972592671711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.2008319695790608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.1470452944437664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,1.3569653828938801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.1415253480275471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,1.0645493666330974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,1.3528265953063965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.2439253330230713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.1539519627888997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,1.3605759938557942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.1514026323954265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,1.086400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,1.3601172765096028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,1.276527961095174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.1892906824747722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,1.3805386225382488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.1740799744923909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,1.1197493076324463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,0.6313759883244833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,1.373978614807129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,0.7424960136413574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,0.6199093262354533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,0.5954826672871908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,0.7311253547668457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,0.6925280094146729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.5789546569188436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,1.3022666772206624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,0.6844053268432617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,0.5286773443222046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,0.6829866568247477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.614138682683309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,0.6888480186462402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.6050560077031454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,0.5323306719462076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,0.6876693566640218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.5800000031789144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.624944011370341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.6039679845174154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,0.6944693724314371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.5861813227335612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,0.5385973453521729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,0.6915253003438314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,0.630677342414856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.5958880186080933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,0.703285296758016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,0.558512012163798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,0.7009119987487793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.583461324373881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,0.6503733396530151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.32992533842722577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.38554131984710693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.32289065917332965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.30926400423049927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.37858664989471436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.35996798674265545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.3014400005340576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.3555306593577067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.3001599907875061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.35585065682729083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.31918932994206745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.3004266619682312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.35708800951639813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.29941866795221966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.27717334032058716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.35419201850891113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.32449599107106525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.3025546669960022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.3601919809977214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.3043573300043742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.2797813415527344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.3593333164850871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.3280106584231059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.30962133407592773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.3662986755371094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.3078560034434001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.2879093289375305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.6198666493097941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.33667198816935223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.1739413340886434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.20449066162109375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.17076265811920166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.16498667001724243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.20142932732899985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.1976906657218933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.15847999850908914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.18784532944361368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.1586240033308665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.1458186705907186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.187717338403066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.17221333583196005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.26934399207433063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.18902933597564697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.15828266739845276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.1469013293584188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.18771199385325113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.1729653278986613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.1606826682885488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.18955200910568237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.16190399726231894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.15850667158762613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.1906773249308268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.17622933785120645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.1635040044784546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.19286400079727173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.16430399815241495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.15432533621788025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.19263466199239096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.18110400438308716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.09628267089525859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.11417599519093831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.09553066889444987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.09423466523488362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.11328533291816711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.10971200466156006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.0881813367207845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.10525866349538167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.08821866909662883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.08088000118732452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.10633066296577454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.09524800380071004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.15029866496721903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.10738666852315266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.08809600273768108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.08090666433175404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.10589333375295003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.09586133559544881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.08913600444793701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.10624000430107117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.08850666880607605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.08203200002511342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.1066986620426178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.09705600142478943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.08979733784993489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.1083573301633199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.0846720039844513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.10642666618029277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.0993333359559377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.055530667304992676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.05499200026194254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.06544533371925354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.0545066644748052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.06550399959087372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.05446400245030721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.36265599727630615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.050661335388819374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.06372266511122386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.05898133416970571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.051813334226608276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.06474666794141133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.05272000034650167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.06471466521422069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.06048533320426941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.054154664278030396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.06599999964237213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.06473599870999654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.05468266705671946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.06659199794133504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.06612800061702728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.06487466891606648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.0606826643149058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.05349333087603251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.04429866870244344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.04402133325735728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.039162665605545044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.038047999143600464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.04320533573627472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.037050666908423104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.0378560001651446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.043824002146720886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.03787733366092046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.04457599918047587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.03325333446264267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.06714666883150737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.033520000676314034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.032229334115982056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.03383466601371765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.026565333207448322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.03391999999682108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.03201599915822347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.0978079636891682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.1173760096232097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,0.9864746729532877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.117578665415446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,0.9977013270060221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.1106239954630535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,1.0309279759724934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.1139039993286133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.129541317621867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,1.0550453662872314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.1216959953308105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.138522704442342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.1216959953308105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,1.065167983373006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.1398186683654785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.074730634689331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.1292906602223713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.1543200016021729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.1636959711710613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.1422719955444336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,1.092570702234904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.0976373354593914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.6171360015869141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.6297813256581625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.6038399934768677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,0.5791253248850504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.6173333326975504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.5880533456802368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.1646560033162434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.5594613154729208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.5686879952748617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.559829314549764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,0.5053439935048422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.5693653424580892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.5102293491363525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.5672213236490885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.5739680131276449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.5656746625900269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,0.5173493226369222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.5963360071182251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.5199253161748251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.5703146855036417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.5803733269373575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.5683733224868774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,0.5256213347117106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.5793066819508871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.575823982556661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.5892159938812256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.5757333437601725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,0.564250667889913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.5859253406524658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.5462719996770223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.3198666572570801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.526469349861145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.3144213358561198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.30137066046396893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.104325294494629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.3060693343480428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.2901866634686788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.2961919903755188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.29038933912913006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.26243199904759723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.2951040069262187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.26693334182103473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.32798399527867633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.29100267092386883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.29732799530029297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.29233600695927936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.3209013342857361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.29632000128428143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.2715680003166199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.29500800371170044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.2997173269589742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.29532267649968463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.2709813316663106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.29902400573094684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.27454400062561035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.30109333992004395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.30594666798909503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.2993866602579753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.2807679971059163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.30482133229573566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.28278400500615436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.26838932434717816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.17298134167989096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.16755199432373047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.16269866625467935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.17099199692408243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.16492266456286112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.15432000160217285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.15685333808263144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.15227199594179788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.14197333653767905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.15522666772206625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.14215466380119324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.15440000096956888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.1567573348681132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.15473066767056784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.14405866463979086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.15657066305478415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.14602667093276978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.15676800409952799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.15863999724388123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.1561973293622335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.14506133397420248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.1584106683731079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.1477120021979014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.1590933303038279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.16125866770744324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.15914133191108704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.15226133664449057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.16867733001708984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.16109866897265115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.15320533514022827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.09444800019264221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.09434133768081665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.09436266620953877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.09521067142486572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.08674133817354839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.0869653324286143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.08687466382980347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.07830399771531422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.08870399991671245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.07948799928029378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.08713066577911377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.08733333150545756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.07973866661389668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.08660800258318584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.08081600069999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.08560533324877422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.08782933155695598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.07976000010967255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.08732266227404277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.0813973347345988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.08905599514643352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.08993066350618999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.08825600147247314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.08351999521255493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.08827733000119527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.056362668673197426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.054832001527150474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.05240533252557119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.05426133175690969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.05379733443260193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.05438933273156484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.0543039987484614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.05353599786758423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.053818667928377785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.09285333752632141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.053541332483291626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.05401599903901418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.05234666665395101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.05453866720199585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.050426666935284935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.05403733253479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.0526506652434667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.05356800059477488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.054431999723116554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.03642666588226954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.0369759996732076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.037130666275819145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.03751466671625773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.03700266778469086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.035887998839219414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.09604266285896301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.03442666679620743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.034485332667827606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.03806400050719579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.03707200040419897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.03692800054947535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.036576000352700554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.036576000352700554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.02757866680622101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.027669332921504974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.025839999318122864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.028207999964555103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.02178666740655899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.021946666141351063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.5171093146006266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.5083946784337362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.509552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.4461333354314168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.5227520068486532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.5140373309453329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.5202986796696981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,0.4742773373921712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.5155413150787354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.511194666226705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.46513601144154865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.5248586734135946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.51746666431427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.5240480105082194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,0.47833065191904706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.5151413281758627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.467029333114624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.5331466595331827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.5251946846644083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.5296479860941569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,0.5061759948730469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.520581324895223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.4968213240305583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.2946293354034424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,0.4578026533126831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.2923626701037089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.2797333399454753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.27270400524139404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.2985440095265706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.26570133368174237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.26924266417821247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.289466659228007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.2655893365542094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.23500800132751465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.27105599641799927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.2688800096511841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.26910400390625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.24717867374420166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.263973335425059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.24174932638804117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.27192533016204834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.26735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.27089067300160724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.24946133295694986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.26822400093078613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.242576003074646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.2792106668154399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.2722880045572917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.27569599946339923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.25810132424036664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.2714080015818278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.2516000072161357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.1590666671593984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.15634133418401083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.15668267011642456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.15018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.1548373301823934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.14633599917093912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.14428266882896423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.14134400089581808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.14492266376813254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.1304639975229899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.14223466316858926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.12680000066757202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.14492266376813254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.23864533503850302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.1446506679058075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.13301333785057068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.14337066809336343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.12919466694196066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.1462613344192505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.1436853309472402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.14668266971906027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.13638400038083395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.13449600338935852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.1490506629149119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.14090133706728616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.1458560029665629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.1467573344707489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.1405119995276133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.2655893365542094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.09040000041325887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.08736000458399455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.0886346697807312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.08726400136947632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.08681066830952962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.08265066643555959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.0805920014778773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.0811359981695811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.0746613343556722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.14460800091425577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.08080000181992848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.07247466842333476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.08245866497357686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.08085866769154866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.08249600231647491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.075573335091273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.08082666496435802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.07348266740640004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.08225066463152568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.08107199768225352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.07506133119265239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.07288533449172974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.08343467116355896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.0825386643409729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.07841599980990092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.08423466483751933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.07660266757011414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.05218133330345154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.050106664498647056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.05029866596062978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.04841066896915436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.04981866478919983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.04906133313973745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.04865066707134247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.04942933221658071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.04411733150482178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.04909333089987437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.048165331284205117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.1455946664015452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.04775466521581014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.13734933733940125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.04934399823347727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.04922133187452952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.0490880012512207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.036303999523321785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.03605333218971888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.03461866577466329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.034517332911491394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.0349440003434817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.03175999969244003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.035375999907652535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.02385066697994868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.02499199906984965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.02404266595840454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.024266667664051056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.025706666211287182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.022389332453409832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.024192000428835552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.025568000972270966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.02057066683967908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.020234666764736176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.02059200033545494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.020261333634455998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.02000533292690913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.01977066695690155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.2850133379300435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.2841493288675944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.2842079997062683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.25330134232838947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.2831679979960124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.2539520064989726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.2870560089747111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.28541332483291626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.28480533758799237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.2647413412729899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.28492265939712524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.2632906635602315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.2887093424797058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.2879306674003601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.2882293264071147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.26558399200439453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.286298672358195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.2659040093421936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.29148799180984497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.2908053398132324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.2901066740353902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.2752373417218526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.29045865933100384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.16485333442687988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.16457600394884744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.1630346675713857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.15849600235621134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.16284799575805664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.15808533628781637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.15187733372052512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.15041066209475198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.13500266273816428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.15030933419863382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.13499200344085693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.27560534079869586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.15038399895032248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.1509119967619578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.13915733496348062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.1509866714477539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.13870400190353394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.1525920033454895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.1534880002339681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.15269866585731506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.14268267154693604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.1520799994468689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.14136000474294028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.15449066956837973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.1544426679611206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.15467733144760132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.14642666776974997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.1546346644560496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.1474560002485911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.092357337474823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.09123733639717102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.09139733513196309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.0906880001227061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.0900266667207082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.09007466832796733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.08336533109347026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.15085333585739136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.08496000369389851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.07709333300590515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.08479467034339905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.07644266883532207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.08340266346931458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.08311466872692108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.08345066507657369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.07668800155321757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08332799871762593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.07615466912587483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.08478400111198425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.08418666323026021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08516266942024231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.0765226682027181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.08554133772850037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.08494399984677632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.08500799536705017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.07993599772453308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.08496000369389851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.07909333209196727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.052853330969810486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.05226666728655497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.05035200218359629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.052485331892967224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.05045333504676819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.05076266825199127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.05011733373006185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08338133494059245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.050373335679372154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.047322665651639305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.05189866820971171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.050741334756215416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.046538665890693665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.05176533261934916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.05061866839726766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.04620266457398733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.05072000126043955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.05190399785836538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.051818668842315674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.05076266825199127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.03358400116364161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.05240533252557119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.03109866629044215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.04982399940490723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.03219199925661087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.032618666688601174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.03350933392842611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.026026666164398193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.017845333864291508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.01775466650724411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.01766933376590411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.2053920030593872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.20459733406702676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.20394132534662882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.18013866742451987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.20435200134913126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.1811573306719462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.20549333095550537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.2058239976565043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.20472000042597452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.1844586730003357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.20414400100708008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.1837973395983378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.20600533485412598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.205567995707194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.2044853369394938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.20574400822321573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.18593599398930868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.20796799659729004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.20799465974171957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.20827200015385947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.19222933053970337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.20672533909479776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.19179733594258627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.1179093321164449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.11799466609954834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.11268799503644307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.11711999773979187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.11024000247319539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.18616533279418945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.10981333255767822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.11115200320879619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.0974826713403066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.11169067025184631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.11126933495203654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.10937600334485371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.09928533434867859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.10943999886512756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.09834133585294087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.11023466785748799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.10985066493352254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.11008000373840332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.11172266801198323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.0993226667245229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.11189333597819011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.11291733384132385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.11124799648920695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.10206400354703267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.11110400160153706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.10172800223032634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.06457066535949707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.06452266871929169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.0650186687707901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.0658240020275116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.060165335734685264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.06407466530799866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.06280000011126201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.062650665640831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.057664001981417336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.06389333307743073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.057429333527882896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.06409599880377452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.05755733450253805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.06426133215427399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.06364266574382782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.0581226646900177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.0641599992911021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.05809600154558817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.06267733375231425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.06358933448791504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.06307200094064076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.0644053320089976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.058042665322621666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.04154133299986521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.11149332920710246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.04005333284536997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.04154133299986521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.11848533153533936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.039461334546407066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.03886399914820989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.040762667854626976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.04016000032424927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.03951466580231985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.04292266567548116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.028570666909217834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.02834133307139079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.0284853329261144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.028277332584063213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.028170667588710785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.021882665654023487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.02231466770172119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.020799999435742695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.020549333343903225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.16461333632469177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.16291200121243796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.16366933782895407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.16313599546750387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.1444480021794637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.16364799936612448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.1644053359826406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.16366400321324667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.1459946632385254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.16458666324615479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.14601066708564758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.16486400365829468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.16473600268363953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.16453333695729574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.14501333236694336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.1646346648534139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.14461333552996317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.1665173371632894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.16470932960510254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.1642453372478485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.14756799737612405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.1646506687005361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.14871999621391296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.09078933795293172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.090938667456309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.08452799916267395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.015658666690190632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.09098666906356812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.08386133114496867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.08902399738629659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.08913066983222961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.1446560025215149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.08116266628106435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.08907199899355571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.08085333307584126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.0886346697807312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.08899733424186707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.08897599577903748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.08906132976214091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.08052266637484233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.08889599641164143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.08898666501045227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.08912000060081482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.08074666559696198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.09086933732032776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.09070400396982829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.09098666906356812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.0830506682395935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.05454400181770325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.05455466608206431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.05283733208974203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.05023466547330221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.052015999952952065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.051829333106676735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.05433600147565206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.05315199991067251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.05294933418432871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.052111998200416565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.05349333087603251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.054042667150497437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.09002133210500081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.054042667150497437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.0526506652434667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.05219733218352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.052239999175071716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.052005335688591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.05202133456865946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.08905599514643352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.03541333228349686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.0359946663180987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.03532266616821289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.03535466641187668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.03356266766786575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.03457066665093104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.034186666210492454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.033887999753157295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.03450666616360346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.0245919997493426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.02277333289384842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.021903999149799347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.019989332805077236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.016645333419243496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.14281066258748373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.1420693298180898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.12847466270128885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.12929600477218628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.14078399538993835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.14195199807484946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.14031466841697693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.1295253336429596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.14045866330464682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.130021333694458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.14199999968210855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.1411679983139038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.14199466506640115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.12805333733558655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.14223999778429666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.12998933593432108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.14235732952753702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.1421173314253489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.1406880021095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.12875733772913614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.1414293348789215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.1288746694723765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.14046399792035422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.07889066636562347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.07361599802970886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.07920533418655396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.07427200178305308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.0788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.07868800063927968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.07891199986139934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.07256000240643819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.07687999804814656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.07231466472148895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.07815466821193695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.07855466504891713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.07891199986139934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.07857066889603932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.07869866490364075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.07900266846021016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.07426133255163829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.0745066652695338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.07874133189519246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.0786186655362447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.07976000010967255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.08061866462230682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.07461333274841309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.048250665267308555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.05003199974695841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.1402666668097178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.04608533283074697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.04740799963474274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.047983999053637184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.04832000037034353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.04742933313051859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.04830400149027506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.04422933359940847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.04782933493455251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.04795200129350027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.0480373352766037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.030741333961486816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.030229332546393078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.03313066562016805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.03072533259789149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.021541332205136616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.01844266677896182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.019551999866962433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.01643199970324834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,float16,float16,0,0.1218986709912618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,float16,fp8,0,0.1236853301525116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,fp8,fp8,0,0.11158933242162068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,float16,fp8,0,0.12378133336702983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,0,1,fp8,fp8,0,0.11170132954915364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,float16,float16,0,0.12204266587893169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,float16,float16,0,0.12179733316103618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,float16,fp8,0,0.12214932839075725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,128,1,fp8,fp8,0,0.11133333047231038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,float16,fp8,0,0.12354666988054912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,64,0,1,fp8,fp8,0,0.11149866382280986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,float16,float16,0,0.12272000312805176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,float16,float16,0,0.12172266840934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,float16,fp8,0,0.12363732854525249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,128,1,fp8,fp8,0,0.11154666543006897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,float16,fp8,0,0.1237493356068929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,64,0,1,fp8,fp8,0,0.11150399843851726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,float16,float16,0,0.12198932965596516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,float16,float16,0,0.1216266651948293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,float16,fp8,0,0.12190933028856914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,128,1,fp8,fp8,0,0.1116426686445872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,float16,fp8,0,0.12250666817029317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,float16,float16,0,0.06839466591676076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,0,0.06881600121657054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,128,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,64,0,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,float16,float16,0,0.0685280015071233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,float16,float16,0,0.06843199829260509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,float16,fp8,0,0.06857599814732869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,64,128,1,float16,float16,0,0.12184533476829529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,float16,fp8,0,0.06852266689141591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,0,1,fp8,fp8,0,0.06244266529877981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,float16,float16,0,0.0684853345155716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,64,0,1,fp8,fp8,0,0.11183466513951619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,float16,fp8,0,0.06825066606203715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,128,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,float16,fp8,0,0.068271999557813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,float16,float16,0,0.06834133466084798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,float16,float16,0,0.06851733227570851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,128,1,fp8,fp8,0,0.06264000137646993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,float16,fp8,0,0.06846400101979573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,64,0,1,fp8,fp8,0,0.06279466549555461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,float16,float16,0,0.06824000179767609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,float16,float16,0,0.06851733227570851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,float16,fp8,0,0.0684746652841568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,float16,fp8,0,0.06845333178838094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,0,1,fp8,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,0,0.04195733368396759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,128,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,0,0.04257600009441376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,64,0,1,float16,float16,0,0.06843199829260509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,64,0,1,fp8,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,128,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,float16,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,64,0,1,fp8,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,128,1,fp8,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,64,0,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,128,1,fp8,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,float16,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,64,0,1,fp8,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,float16,float16,0,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,float16,fp8,0,0.04260266820589701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,128,1,fp8,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,float16,fp8,0,0.04308266441027323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,64,0,1,fp8,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,float16,float16,0,0.027903998891512554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,0,0.027952000498771667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,128,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,64,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,64,128,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,float16,fp8,0,0.028954667349656422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,128,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,64,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,64,128,1,fp8,fp8,0,0.06408533453941345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,float16,float16,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,64,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,float16,float16,0,0.027829334139823914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,128,1,fp8,fp8,0,0.027072000006834667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,64,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,64,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,float16,float16,0,0.02179733415444692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,float16,float16,0,0.02298133323589961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,128,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,64,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,0,0.018672000616788864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,128,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,64,0,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,64,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,float16,float16,0,0.01882133384545644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,64,0,1,fp8,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,64,0,1,fp8,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,64,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,64,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,64,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,fp8,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,128,1,fp8,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,float16,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,64,0,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,0.8657120068868002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,0.878544012705485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,0.8041813373565674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,5.54098637898763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,64,128,1,float16,fp8,0,0.01623999948302905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,0.8903679847717285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,0.8955360253651937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,5.55400021870931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,5.0549014409383135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,0.8228480021158854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,0.904250701268514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,5.5589173634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,0.9112586975097656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,5.083951950073242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,5.575482686360677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,0.8585333029429117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,0.9351413249969482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,5.577920277913411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,0.9432266553243002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,0.9010506470998129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,5.593178431193034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,5.611738840738933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,5.1038665771484375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.5196533203125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.531823992729187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,5.141994794209798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,0.4983359972635905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,2.9229440689086914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.4799040158589681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,2.6719465255737305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,2.9348745346069336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,5.623658498128255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.46564265092213947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,2.85427188873291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.46667198340098065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,2.853642781575521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,2.6038026809692383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.42905600865681964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.486682653427124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.43480531374613446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,2.856351852416992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.47412268320719403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,2.86083189646403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,2.6149439811706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.48006399472554523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.44287999471028644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,2.864410718282064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.4878133138020833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,2.870181401570638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.49560534954071045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,0.4617066780726115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,2.8808107376098633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.2920959989229838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,2.624021371205648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,2.8951307932535806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.29788800080617267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,1.5437547365824382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.29397332668304443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.2633066574732463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,1.5497120221455891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,1.4187199274698894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.26311467091242474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.24709333976109824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,2.6351253191630044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.26506133874257404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,1.5101440747578938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.2670186758041382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,1.5112266540527344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.25012799104054767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,1.5085387229919434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.26972800493240356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,1.5133813222249348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,1.3835679690043132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.2738719979921977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.2564000089963277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,1.5144000053405762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.2765386700630188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,1.5211787223815918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,1.389909267425537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.2809973359107971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.26315732796986896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,1.3843092918395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,1.5247146288553874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.2076853315035502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,1.5308747291564941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,0.8890453179677328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.19881065686543783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.20566932360331217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,0.8289066950480143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.20562666654586792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,0.8843200206756592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.19595199823379517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,1.397871971130371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,0.8840586344401041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,0.8144533634185791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,0.8896479606628418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.20572799444198608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,0.8829813003540039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.19537067413330078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.20595200856526694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,0.8123093446095785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.20559465885162354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,0.8846506277720133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.20368534326553345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.19770665963490805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.20777066548665366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,0.8853387037913004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,0.8131253719329834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.20968000094095865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.1965173284212748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,0.8870453039805094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,0.8859253724416097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,0.6709653536478678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,0.6618186632792155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,0.8850080172220866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,0.620416005452474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,0.8149173259735107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,0.6625973383585612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,2.9878772099812827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,0.6705493132273356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,3.274709383646647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,0.6289973258972168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,3.2917067209879556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,3.287013371785482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,0.686410665512085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,0.6886879603068033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,2.9990720748901367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,0.6302666664123535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,3.3051573435465493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,0.696943998336792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,3.3046239217122397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,3.0212532679239907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,0.7039679686228434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,0.6707839965820312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,3.290138562520345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,3.332559903462728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.39506133397420246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,3.0427894592285156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.41330134868621826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,1.7514932950337727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,3.330906550089518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,1.7595787048339844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.35122132301330566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,1.6064586639404297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.3550560077031453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.3293066620826721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,1.701317310333252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.35501333077748615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,1.7041385968526204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,1.5566986401875813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.35900266965230304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.3328266739845276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.37856535116831463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.36103467146555585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,1.707632064819336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.36653868357340497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,1.7118879954020183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,1.7055786450703938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,1.5601226488749187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,1.5689759254455566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.3705759843190511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.34893866380055744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.37715200583140057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.3508853514989217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,1.72489595413208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.2241386572519938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,1.7173813184102376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,1.7343254089355469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.23059199253718057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.22216532627741495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,0.9414453506469727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.20169599850972494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,0.9456533590952555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,0.8698986371358236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.20406933625539145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.19138665994008383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,0.9153333504994711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.20245865980784097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,0.9154346783955892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,1.5808960596720378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.2036799987157186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,0.9181386629740397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.20650132497151694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,0.9189759890238444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,0.8451946576436361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.20972265799840292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.19815999269485474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,0.9213439623514811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,0.8406133651733398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.21249600251515707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,0.92413330078125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.21632534265518188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.2053920030593872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,0.9287839730580648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.16261333227157593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,0.932639996210734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,0.8546720345815023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.16328533490498862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.5663093328475952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.5615520079930624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,0.8479146957397461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.16023466984430948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.5573013226191202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.1525973379611969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.1539520025253296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.5158079862594604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.16062399744987488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.16008533040682474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.5559466679890951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.1607360045115153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.15053332845369974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.5571093161900839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.512725313504537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.1606666644414266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.5130293369293213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.1607093314329783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.5573386748631796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.5565439860026041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.5130186478296915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.1607146660486857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.5598613421122233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.5581706762313843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.15240533153216043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.5591040054957072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.5130240122477213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.5758080085118612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.5508480072021484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.1623093287150065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,0.5045973459879557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.15227199594179788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.5543306668599447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,2.156485398610433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.5586133400599161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,2.369994640350342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,2.364581267038981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,2.364837328592936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,2.380138715108236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,0.512661337852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.5832693179448446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,2.1670400301615396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.568341334660848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,0.5250453154246012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,2.3809812863667807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.5798773368199667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,2.3910346031188965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,0.5649439891179403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,2.4008960723876953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,2.173583984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,2.4135360717773438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,2.1990559895833335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.3285653392473857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.33805867036183673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.3163573344548543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,1.2789759635925293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.291594664255778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,1.285365343093872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,1.173141320546468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.5877600113550822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.2742026646931966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,1.2370453675587971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.2958986759185791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,1.2371520201365154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.31523199876149494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,1.2382453282674153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.2943626642227173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,1.243338664372762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,1.133840004603068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.31083200375239056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.30523733297983807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.2794826626777649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.2867306669553121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,1.249125321706136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,1.1387413342793782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,1.1321706771850586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.30980799595514935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.3142293294270833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.2932426730791728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,1.2561759948730469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.18895999590555826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,1.2586080233256023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,1.1496799786885579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.1925706664721171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.18945066134134927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,0.7128960291544596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.6433706680933634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.1683893402417501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.16907199223836264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.15870933731396994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,0.6957813103993734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,0.6723519961039225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.6195786794026693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,1.2450239658355713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,0.6748106479644775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,0.6736053625742594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.16267200311024985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,0.674453337987264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.6221813360850016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.17283733685811362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.17046932379404703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,0.6930987040201823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.16661333044370016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.1707520087560018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.6256266832351685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.17896533012390137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.18278400103251138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.17306667566299438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.1750613252321879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,0.6844106515248617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.6302719910939535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,0.6784693400065104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.1381653348604838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,0.6829706827799479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.13319466511408487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.42285335063934326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.1360479990641276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.1383039951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.42140265305836994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.4240800142288208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.13607999682426453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.4225920041402181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.38834667205810547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.1381226678689321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.3919946750005086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.13809067010879517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.12986133495966592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.42109866937001544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.1360373298327128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.12877866625785828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.13615467151006064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.12980266412099203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.4225173393885295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.42094401518503827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.3884426752726237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.42244799931844074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.42293866475423175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.13661332925160727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.38844799995422363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.42181865374247235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.3900159994761149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,0.8526933193206787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.1360213359196981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,0.857040007909139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,3.119450569152832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.13024000326792398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,3.1268692016601562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,2.8412319819132485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,0.8743786811828613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,3.1377439498901367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,0.8662346998850504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,0.802623987197876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,0.9087146917978922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,2.855968157450358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,0.7824052969614664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,0.8911680380503336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,0.8227252960205078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,3.1559572219848633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,0.9134666919708252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,3.1447200775146484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,2.876943906148275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,0.9209280014038086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,0.8583679993947347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,3.190272013346354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.49882133801778156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,3.1589600245157876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.5085066556930542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,1.6670079231262207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,0.47444268067677814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,1.6769973436991374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,1.5260000228881836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.4379093249638875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.44203734397888184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,1.5970239639282227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.42258667945861816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,3.201317469278971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,1.6021706263224285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.44369598229726154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.44892267386118573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,1.6029225985209148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.4127039909362793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,1.4621334075927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,1.6086986859639485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,1.4648799896240234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.45185601711273193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.45631468296051025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.42213865121205646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,1.6133707364400227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.465776006380717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,1.6180213292439778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.4731733401616414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,1.6282240549723308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,2.915712038675944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,1.6371787389119465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.2679893374443054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.4516479969024658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,0.8782239754994711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.27315733830134076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,1.4721652666727703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,0.8817973136901855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,0.8085707028706869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.235807995001475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.2382026712099711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.22419732809066772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,1.4891626040140789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,0.843285322189331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,0.8442613283793131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.2420639991760254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,0.8411359786987305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.23956799507141113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,0.8472853501637777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.24624532461166382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,0.7768959999084473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.24714666604995728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,0.8536640008290609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.23811733722686768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,0.8570346832275391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,0.7732799847920736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.25150932868321735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.2550080021222432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.2471946676572164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.22849599520365396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,0.8618666330973307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,0.7817973295847574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.48625067869822186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,0.860912005106608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.1565546691417694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.15033599734306335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.48845334847768146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.4517173369725545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.1361120045185089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.13703466455141702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.12770133217175803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.46795201301574707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.46756800015767414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.42736534277598065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.13593600193659464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.46794132391611737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.13779733578364053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.12778133153915405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.46803200244903564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.42929065227508545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.13870933651924133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.4697333176930745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.13546133041381836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.15261333187421164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.4724373420079549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.4352533419926961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.14311466614405313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.14433599511782327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.14037332932154337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.13988266388575235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.4764106671015422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.44096000989278156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.11157332857449849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.30409600337346393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.10880000392595927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.47464001178741455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.3041173418362935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.28169600168863934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.3037066658337911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,0.7909386952718099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.10736533006032307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.30269332726796466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.28143467505772907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.11170666416486104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.30210665861765545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.10703999797503154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.30347200234731037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.27956799666086835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.11301333705584209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.30404800176620483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.11154666543006897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.10734400153160095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.3039360046386719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.2810666759808858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.11128532886505127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.30215466022491455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.11156800389289856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.11166399717330933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.30460800727208454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.28147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.11148266990979512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,0.6391893227895101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.11156266927719116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.10691733161608379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,0.5865973234176636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,1.8955519994099934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,1.7196106910705566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,0.6786346435546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,0.644048015276591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,1.8920213381449382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,0.6532959938049316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,1.8999999364217122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,0.6606613397598267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,1.906335989634196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,1.7335093816121419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,0.6671413580576578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,0.6467573245366415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,0.5991146564483643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,1.9175732930501301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,0.682207981745402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,1.922335942586263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,0.6901600360870361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,1.9411999384562175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,0.6401333411534628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.3792213201522827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,1.9451467196146648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,1.0274826685587566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,1.749743938446045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.3609120051066081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,1.034272034962972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,0.9473280111948649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.3331200083096822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,1.7806827227274578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,0.9797226587931315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.3980213403701782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,0.9821440378824869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,0.8956426779429117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.339466651280721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.34086398283640545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.3360639810562134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.3251360058784485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.31099732716878253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,0.9849973519643148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,0.9009119669596354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.35020800431569415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,0.9919466972351074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.3283093372980754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,0.9945173263549805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,0.9074880282084147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.3614879846572876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.3596266508102417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,1.00218669573466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.33477334181467694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,0.9825546741485596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,1.0087040265401204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.34506134192148846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.2075786590576172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.21195733547210693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.20175999402999878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.5550560156504313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,0.9188106854756674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.5112213293711344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.18153599898020426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.5208426713943481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.17292267084121704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.523914655049642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.4843466679255168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.18224533398946127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.5235253175099691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.19660800695419312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.17484800020853677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.5260159969329834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.1827039917310079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.49747733275095624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.18728532393773398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.1904426614443461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.529210646947225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.1916853388150533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.5333866675694784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.4904319842656453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.1973386605580648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.5376960039138794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.1856373349825541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.541370670000712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.5497973362604777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.49696000417073566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.12341333429018657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.11854400237401326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.19420266151428223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.3132479985555013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.291429340839386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.29833600918451947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.10964266459147136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.12007466952006023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.3102239966392517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.29812800884246826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.2712373336156209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.2982719937960307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.10843200484911601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.10948266585667928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.101200004418691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.29872532685597736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.27353066205978394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.10969600081443787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.10140267014503479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.30010666449864704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.10371200243631999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.3002293308575948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.10885866483052571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.11224533120791118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.30187733968098956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.10964799920717876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.31013866265614826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.2835093339284261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.10962667067845662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.08737599849700928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.20161600907643637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.2753866712252299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.08283199866612752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.20165866613388062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.18705600500106812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.08711466193199158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.20176533857981363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.08750399947166443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.20205867290496826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.18731200695037842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.08703999718030293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.201690673828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.08705600102742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.08693333466847737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.2018400033315023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.18786666790644327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.08770133058230083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.11373866597811381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.20358934005101523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.08257066706816356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.20359466473261514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.18709866205851236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.08687999844551086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.20225600401560465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.20323199033737183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.1874720056851705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.0881813367207845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,0.8436906337738037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.08701866865158081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,0.850752035776774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,1.8927946090698242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,0.7726346651713053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,0.862501303354899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,1.9007360140482585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,1.717626730600993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,0.8664106527964274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.08275733391443889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,1.9073546727498372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,0.7958827018737793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,1.9140480359395344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,0.8780213197072347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,1.7405279477437336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,0.8832480112711588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,0.8120853106180826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,1.9322932561238606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,0.9306080341339111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,0.9140053590138754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,1.9599199295043945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,0.8468960126241049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,1.9337813059488933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,1.9674720764160156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.4899093310038249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,1.7978453636169434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.49604801336924237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,1.0293653011322021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,1.0368213653564453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,0.9519360065460205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,1.757530689239502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.4299413363138835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.43301331996917725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,0.9670506318410238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,0.46991999944051105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,0.9691253503163656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,0.8820586999257406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.4344160159428914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.44021864732106525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,0.9737280209859213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,0.9773973623911539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.41786134243011475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.442469318707784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,0.9790399869283041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.44722668329874676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.43389864762624103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.4024159908294678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,0.9861439863840739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.456005334854126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,0.8956159750620524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,0.9029013315836588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.4626293182373047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.42769066492716473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.262661337852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,0.9135626951853434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.539242664972941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.2630560000737508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.24803733825683594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.5453226566314697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.5130133231480917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.22380266586939493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,0.9970080057779948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.2266826629638672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.21210134029388428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,1.0026133060455322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.5083786646525065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.5092106660207113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.2312426765759786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.21596266825993857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.504746675491333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.5109493335088094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.23261332511901855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.5156373182932535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.23654399315516153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.22091732422510782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.5220533212025961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.4703893264134725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.4761546850204468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.24080000321070352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.5228480100631714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.2445759971936544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.22831465800603232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.5279093186060587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.48401065667470294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.29652265707651776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.2278239925702413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.13795733451843262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.29900266726811725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.27826132376988727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.12062399586041768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.14053866267204285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.2749600013097127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.4676106770833333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.27614400784174603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.2542293270428975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.12004266182581584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.2756906747817993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.12380266189575195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.11750400066375732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.27848533789316815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.25700799624125165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.12566933035850525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.27955732742945355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.12370133399963379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.2813599904378255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.12116266290346782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.14430933197339377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.13005333145459494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.2856053312619527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.13198933005332947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.12930132945378622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.28832000494003296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.12755200266838074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.2690826654434204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.17255999644597372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.08544533451398213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.17413866519927979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.16571733355522156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.07877866427103679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.16750399271647134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.08087466657161713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.08190399905045827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.15249600013097128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.07680533329645793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.1665493349234263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.07836266855398814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.07259200016657512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.2613919973373413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.16699733336766562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.15267200271288553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.0764213353395462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.16701332728068033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.16896533966064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.15467199683189392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.07878399888674419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.16850133736928305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.08080000181992848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.07478400071461995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.16892266273498535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.15648000439008078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.12380266189575195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.16664000352223715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.12378666798273723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.11559999982515971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.062368000547091164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.1232373317082723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.12251733740170796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.1139306624730428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.12372266252835591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.06242666641871134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.060533334811528526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.06203199923038483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.11555199821790059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.12342400352160136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.06262933214505513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.12342933813730876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.11541333794593811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.06229866544405619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.12193600336710612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.06206400195757548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.1232373317082723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.11379733681678772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,0.6374026536941528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.07708266874154408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.1237493356068929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,0.6396426757176717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.1965386867523193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,0.5826666752497355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,0.6466986735661825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,1.0867146650950115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.2083946863810222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,0.6518239974975586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,0.5990879933039347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.2117280165354412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,1.1006453037261963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.199343999226888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.2211466630299885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,0.6099199851353964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.226650635401408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,0.661290685335795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,1.1158560117085774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,0.6847360134124756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,0.6630986531575521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.2452586491902669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,0.6372106472651163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.37305064996083576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.2468533515930176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,1.1395359834035237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,0.6638559897740682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.3749866485595703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,0.6705706914265951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,0.7026666800181071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.32890133062998456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.6150399843851725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.32980799674987793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.3039253354072571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.37878934542338055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.6173493464787801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.5640480120976766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.616207997004191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.6205706596374512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.3089333375295003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.6260266701380411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.570192019144694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.33338133494059247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.6289546489715576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.34200533231099445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.31594665845235187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.6312693357467651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.5779199997584025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.35025068124135333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.33901333808898926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.6400479873021444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.35410133997599286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.326474666595459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.6434666713078817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.5882933139801025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.3526080052057902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.1917333404223124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.3309013247489929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.3581013282140096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.33083732922871906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.1707680026690165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.32386134068171185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.1645813286304474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.325162669022878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.302181343237559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.17241599162419638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.3267146746317546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.1725920041402181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.19962133963902792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.32860267162323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.3042186697324117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.17710934082667032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.33134933312733966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.17512534062067667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.18078400691350302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.16548267006874084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.3330026666323344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.31014400720596313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.18529599905014038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.3404906590779622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.18773333231608072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.1783413290977478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.34084800879160565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.31723199288050336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.19708265860875449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.17114132642745972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.10788800319035848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.1978293259938558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.18746666113535562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.09712533156077068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.18134933710098267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.09730133414268494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.11265599727630615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.2039626638094584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.16731733083724976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.09669867157936096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.1824480096499125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.1109226644039154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.08889066179593404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.1829866568247477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.09915199875831604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.1834239959716797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.09981866677602132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.09327999750773112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.18509334325790405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.18354666233062744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.10159466663996379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.18594666322072348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.10345066587130229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.09990400075912476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.09818666179974873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.17965332667032877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.0646666685740153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.11595732967058818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.06628266473611195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.06444799900054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.11830400427182515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.1120746632417043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.06233066817124685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.11441600322723389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.16778133312861124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.11338667074839275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.06027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.11422933141390483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.06137600044409434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.05861866474151611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.1143946647644043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.06198933223883311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.11397332946459453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.17054933309555054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.060362666845321655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.11570133765538533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.10728533069292705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.062368000547091164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.11588799953460693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.11656000216801961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.10757333040237427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.08525866270065308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.051632001996040344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.08496532837549846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.05264533559481303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.0862506628036499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.08660800258318584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.08061333497365315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.0543146679798762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.08617066343625386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.053770666321118675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.08679466446240743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.0806826651096344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.054229333996772766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.086517333984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.0803306649128596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.05428266525268555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.086709330479304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.051781331499417625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.08710400263468425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.08060266574223836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.18859734137852988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,0.8512213230133057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.08675199747085571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.2838186422983806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,0.8495840231577555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,0.7629333337148031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.283903996149699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,0.8669760227203369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.3028799692789714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,0.867253303527832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.3021653493245442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,1.1702880064646404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,1.1544373035430908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,0.9045173327128092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.3227787017822266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,0.7775039672851562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,0.8839840094248453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,0.7963786919911703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.3199199835459392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,1.346560001373291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,0.922106663386027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,0.8338399728139242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,1.3480106989542644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.2277440230051677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.48826666673024494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,0.7192266782124838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.49517865975697833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.4641653299331665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,0.722378651301066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,0.6768319606781006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.4233013391494751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,0.9067200024922689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.6503200133641561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.39075199762980145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.6530773242314657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.593509316444397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.4337173302968343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,0.6566933393478394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.4370933373769124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.1877280076344807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.4281653165817261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.66102401415507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.6015679836273193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.44021864732106525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,0.6663839817047119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.40721599260965985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.6707733472188314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.6099253495534261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.39879465103149414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.45557332038879395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,0.6791360378265381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.45876801013946533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,0.683626651763916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.25389333566029865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.373418649037679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.25944000482559204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.24485333760579428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.3781013488769531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.4456160068511963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.34940266609191895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.21965867280960083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.33693333466847736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.42501866817474365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.2069759964942932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.6256800095240275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.3391520182291667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.3133920033772786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.22220265865325928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.3409973382949829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.22463999191919962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.21112000942230225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.3439893325169881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.31593600908915204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.23020267486572266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.34694401423136395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.23189334074656168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.21581866343816122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.32238932450612384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.2371573249499003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.3563733498255412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.22447999318440756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.22107734282811484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.35972265402475995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.3285920023918152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.13611732920010886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.2026240030924479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.13397866487503052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.20548800627390543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.24075200160344443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.19241599241892496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.11558399597803752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.1809813380241394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.11622933546702068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.18149334192276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.16612799962361655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.11548266808191936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.14305599530537924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.1176479955514272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.11131200194358826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.18333866198857626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.1691733400026957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.11964266498883565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.18347734212875366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.12065600355466206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.11745599905649821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.18555732568105063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.17707733313242593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.12430399656295776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.19163199265797934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.12688533465067545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.12395733594894409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.19341866175333658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.3500959873199463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.07691200077533722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.11363200346628825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.07738133271535237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.11522666613260905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.11308800180753072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.07107200225194295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.10752000411351521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.06618666648864746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.0786186655362447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.10789866248766582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.10030399759610494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.07077866792678833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.1071626643339793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.07234133283297221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.1076853374640147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.09949866930643718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.07064533233642578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.10752532879511516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.06841066479682922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.10925333698590596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.10146133104960124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.07262933254241943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.10938133796056111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.07470933099587758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.07028266787528992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.11150399843851726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.04819199939568838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.07457066575686137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.07472000022729237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.18112534284591675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.18260266383488974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.07259733478228252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.06838933130105336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.046037331223487854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.0729013333717982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.0726506660381953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.0683786670366923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.07107733190059662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.07246933380762736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.07253866891066234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.06895466645558675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.04805333415667216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.0735093355178833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.04530133306980133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.07283199826876323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.06870399912198384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.03736533224582672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.05433600147565206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.05429333448410034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.03902400036652883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.05445333321889242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.054757331808408104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.039103999733924866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.036090667049090065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.056090667843818665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.0755680004755656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.05410666763782501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.05433600147565206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.05428266525268555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,0.6536480188369751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.05374933282534281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,0.8683253129323324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.6551626523335775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,0.5959359804789225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,0.8719733556111654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,0.7877813180287679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,0.6787146727244059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,0.890613317489624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,0.6761546929677328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,0.8902773062388102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,0.799178679784139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,0.6919679641723633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,0.6893226305643717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,0.6210399866104126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,0.9070506890614828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,0.8149440288543701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,0.7147626876831055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,0.9089866479237875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,0.7052480379740397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,0.6064586639404297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,0.9213333129882812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,0.8378293514251709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.38197867075602215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.4944213231404622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.38357333342234295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.3591253360112508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.4976319869359334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,0.6422293186187744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.32846933603286743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.4403680165608724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.3303413391113281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.30407466491063434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.4413493474324544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.40381864706675213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,0.9317386945088705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.33344535032908124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.4450826644897461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.31058667103449505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.4598506689071655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.44737064838409424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.4102240006128947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.34251733620961505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.45397333304087323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.3176213304201762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.45607999960581463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.41786666711171466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.35233068466186523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.4646506706873576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.35522667566935223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.3288319905598958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.4679306745529175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.4289919932683309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.3445119857788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.25893332560857135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.20045334100723267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.18980799118677774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.3351893424987793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.26216532786687213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.2432373364766439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.16503467162450156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.22589866320292154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.16729066769282022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.19794134298960367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.2285226583480835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.21388800938924155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.16914133230845133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.22749332586924234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.17102932929992676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.16292267044385275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.23107200860977173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.21697600682576498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.17531200249989828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.2358400026957194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.17677332957585654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.16863999764124551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.16082132856051126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.18388267358144125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.24467732508977255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.18531199296315512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.17520000537236533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.2453226645787557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.22902933756510416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.10618666807810466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.139573335647583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.10891733566919963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.10603200395901997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.14220266540845236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.13684800267219543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.09123733639717102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.12526933352152506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.09311999877293904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.22463999191919962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.11719999710718791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.09128000338872273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.12566399574279785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.08558932940165202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.12611732880274454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.11718933780988057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.09499200185139973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.23668799797693887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.12798933188120523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.0849173367023468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.08967467149098714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.1279093325138092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.12037332852681477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.09324799974759419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.1297653317451477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.09969066580136617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.13205333550771078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.12774933377901712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.059877331058184304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.08264533181985219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.08303466439247131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.056703999638557434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.07851733267307281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.0734986662864685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.056426664193471275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.07889066636562347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.056186666091283165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.07828266421953838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.07369066774845123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.05665599803129832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.07894933223724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.07974400122960408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.07414400080839793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.05826666454474131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.07923200229803722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.058559998869895935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.08078399797280629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.07669866581757863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.04371733466784159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.055120001236597695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.05376000205675761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.05429333448410034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.04087999959786733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.05379733443260193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.04971200227737427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.09716266393661499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.05383466680844625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.09588799873987834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.05422399938106537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.040074666341145836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.04207466542720795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.04211199780305227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.05082133412361145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.04571199913819631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.03232000023126602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.04423466821511587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.03183999905983607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.03305600086847941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.04437866806983948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.03320533285538355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.04557333389918009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.0332640012105306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.033402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.032298666735490165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,0.7584160168965658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,0.8998613357543945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,0.7523413499196371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.03431999931732813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,0.891808032989502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,0.8183519840240479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,0.768175999323527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,0.9047040144602457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,0.7630026340484619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,0.6959146658579508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,0.6954773267110189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,0.9015253384908041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,0.8208373387654623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,0.7822399934132894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,0.9210560321807861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,0.7757066885630289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,0.8092160224914551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,0.9207519690195719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,0.9298826853434244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,0.9047786394755045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,0.7544480164845785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,0.7792159716288248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,0.8982079823811849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.40620267391204834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,0.905290683110555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.4814560015996297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.39848534266153973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,0.762885332107544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.47408533096313477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.46479467550913495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.3884906768798828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.46952001253763836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.3872906764348348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.3524640003840129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.4594186544418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.39239998658498126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.46513064702351886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.3904800017674764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.36791467666625977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.46199464797973633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.42215998967488605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.3988746802012126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.4723466634750366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.4181813398996989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.39774401982625324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.39210132757822674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.3974613348642985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.4542986551920573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.39104000727335614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.46369067827860516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.3871786594390869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.3779733180999756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.4602880080540975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.444106658299764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.25309866666793823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.47198931376139325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.2097866733868917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.2104746699333191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.24873065948486328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.24474666515986124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.20382400353749594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.2419360081354777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.20198400815327963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.18521066506703696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.239738663037618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.2181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.20589333772659302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.2431946595509847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.20574933290481567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.18787733713785806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.22235200802485147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.2099306583404541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.2481173276901245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.20990933974583945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.19939200083414713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.24698134263356528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.23271999756495157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.20598934094111124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.24502400557200113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.20586133003234863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.19763733943303427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.2428426742553711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.24444266160329184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.11772800485293071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.14105066657066345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.11622400085131328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.13891200224558511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.13657599687576294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.10988799730936687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.13247467080752054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.2320800026257833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.11005866527557373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.10135466853777568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.1313759982585907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.11992533008257548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.21389333407084146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.1322719951470693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.10986133416493733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.10274666547775269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.13246933619181314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.12157332897186279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.11271466811498006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.13591999808947244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.11228799819946289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.10970667004585266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.1344213287035624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.1280639966328939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.11186133821805318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.13613333304723105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.11148800452550252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.10948800047238667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.13427199920018515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.1281599998474121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.07734933495521545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.06796266635258992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.0769706666469574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.07657066484292348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.11617599924405415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.07493866483370464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.11013333002726237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.06229866544405619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.0601440022389094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.0707893321911494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.0640533318122228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.06413866579532623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.06051200131575266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.0767680009206136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.07025599976380666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.06461333235104878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.07682666679223378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.06482133269309998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.06253333389759064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.07679466903209686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.07256000240643819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.06242666641871134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.07517333328723907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.06390933195749919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.06192533175150553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.04029333343108495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.05045866469542185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.04019733270009359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.0395359992980957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.06249066690603892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.05070933202902476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.04221866528193156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.05128533144791921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.076773335536321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.05089599887530009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.04756266872088114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.051413332422574363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.04142399877309799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.03985599925120672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.05180266499519348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.04211199780305227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.04011733333269755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.04939733445644379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.028016000986099243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.032245332996050514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.03219733387231827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.03218133250872294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.02595199892918269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.025973332424958546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.03181866556406021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.033759998778502144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.02666666607062022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.03200000027815501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.03945599993069967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.02810666710138321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,0.7393866380055746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,0.7473013401031494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.029109333952267964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,0.7334720293680826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,0.6757973035176595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,0.7470666567484537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,0.7570400238037109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,0.7411999702453613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,0.6764427026112875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,0.7424906889597574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,0.7531147003173828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,0.6843146483103434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,0.7621013323465983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,0.7755306561787924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,0.761840025583903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,0.6707519690195719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,0.7732373078664144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,0.7924213409423828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,0.7413547039031982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,0.7545279661814371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,0.7328746318817139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,0.7847306728363037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,0.7601760228474935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,0.7447786331176758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,0.7727466424306234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.392522652943929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.40114132563273114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.38683732350667316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.39370131492614746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.39207998911539715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.37814398606618244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.38388268152872723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.3758080005645752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.3428959846496582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.3819520076115926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.3458186785380046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.3830453157424927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.38788266976674396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.38098132610321045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.3481333255767822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.38610132535298664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.35178665320078534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.3901120026906331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.394538680712382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.3879520098368327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.38146666685740155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.3927520116170247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.38787734508514404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.379807988802592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.38674132029215497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.3879733482996623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.377344012260437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.3829226493835449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.371509313583374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.21051732699076334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.20386666059494019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.2037280003229777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.20854934056599936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.20642133553822836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.1975946625073751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.2001333236694336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.19726399580637613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.3772159814834595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.19935999313990274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.1819093426068624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.20796799659729004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.20362132787704468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.19988799095153809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.18317866325378418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.20190399885177612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.1856106718381246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.20368534326553345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.20803733666737875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.20358399550120035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.18111467361450195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.20568533738454184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.1951520045598348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.1996906598409017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.20268267393112183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.19758933782577515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.19402666886647543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.20113066832224527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.19708800315856934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.11514133214950562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.11710932850837708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.11364799737930298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.11329600214958191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.19404800732930502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.11478400230407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.10723732908566792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.109525332848231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.20059732596079508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.0990773340066274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.10920000076293945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.10018133123715718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.10929066936175029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.11132267117500305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.10155199964841206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.10974400242169698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.1013706624507904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.11161067088445027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.1129919985930125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.10997866590817769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.10622933506965637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.11146133144696553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.10754133264223735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.1113813320795695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.11336533228556316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.11131200194358826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.10750400026639302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.11152533690134685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.10714667042096455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.0629066675901413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.06406400104363759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.0655680000782013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.06339733302593231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.11556800206502278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.06035733222961426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.06211733321348826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.05834133426348368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.06090133388837179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.062128002444903054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.062261333068211876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.05832533538341522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.058464000622431435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.06205333272616068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.062037333846092224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.06058133145173391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.06262933214505513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.10680533448855083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.06166933476924896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.061621333161989846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.04156800111134847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.04221333563327789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.04008000095685323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.040037333965301514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.041936000188191734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.04242133100827535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.041077333192030586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.04119466741879781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.042912001411120095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.039994666973749794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.04211199780305227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.042277331153551735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.04048000027736028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.042863999803860985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.040261333187421165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.028501334289709728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.028437333802382152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.0269813338915507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.040752001106739044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.027679999669392902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.022490667800108593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.022128000855445862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.022522665560245514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.021930667261282604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.024133334557215374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.022074667116006214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.02048533285657565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.021690666675567627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.020586666961510975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.3475733200709025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.3393760124842326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.343664010365804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.31480000416437787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.33693333466847736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.3066506584485372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.3529226779937744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.34865065415700275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.34914668401082355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.31862932443618774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.3449653387069702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.31060266494750977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.36319466431935626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.3591200113296509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.36057599385579425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.35354665915171307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.35550932089487713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.3535626729329427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.34700266520182294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.3500639994939168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.3474453290303548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.34280534585316974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.34206398328145343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.19373865922292074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.19048533837000528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.19075733423233032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.19142399231592813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.18766399224599203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.1880693236986796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.1823306679725647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.1792746583620707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.18157333135604858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.16633066534996033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.1779093345006307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.1629759967327118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.18542933464050293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.18273067474365234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.18582399686177573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.16909867525100708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.18124266465504965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.16544000307718912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.19144533077875772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.1896479924519857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.1905333399772644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.1792693336804708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.1881706714630127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.17707200845082602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.18701332807540894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.18386133511861166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.18716800212860107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.17998399337132773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.1824480096499125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.17707733313242593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.10565867026646932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.10622933506965637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.1055573324362437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.10482666889826457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.34705599149068195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.10326400399208069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.09940266609191895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.0955466628074646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.1067626674969991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.09524800380071004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.0993226667245229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.0974026620388031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.09923199812571208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.09326933821042378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.09714133540789287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.10339200496673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.10128532846768697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.10225066542625427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.09879466891288757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.09650666515032451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.10331733028093974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.09924266735712688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.10115733742713928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.09922132889429729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.09657067060470581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.060458665092786155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.05880000193913778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.090938667456309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.058703998724619545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.05452266832192739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.05870933334032694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.05347733199596405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.060032000144322716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.05866133173306783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.05972266693909963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.056186666091283165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.05793066819508871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.05989866455396017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.057962665955225624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.05439466734727224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.056277334690093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.05677866439024607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.05625600119431814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.037920000652472176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.056794668237368263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.037061333656311035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.0378560001651446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.03605866680542628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.037733333806196846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.035775999228159584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.02492800106604894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.03594133257865906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.06003733476003011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.024853333830833435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.02605333427588145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.021840001145998638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.019952000429232914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.021573332448800404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.019760000209013622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.01811733345190684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.018031999468803406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.19339199860890707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.19499733050664267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.19179733594258627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.18249066670735678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.19202667474746704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.1800640026728312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.19570134083429971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.19543999433517456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.19346133867899576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.19376534223556519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.17333332697550455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.1724053422609965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.19885333379109701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.19490132729212442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.18640534083048502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.19563732544581094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.19744000832239786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.19748800992965698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.19819732507069907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.18629332383473715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.19770665963490805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.198634664217631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.18638400236765543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.11199999849001567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.10957866907119751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.10851732889811198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.1853813330332438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.10814399520556132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.10360532999038696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.10244799653689067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.09566400448481242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.10333866874376933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.09703466296195984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.10963732997576396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.10537599523862202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.10403199990590413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.10314133763313293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.10337600111961365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.09587732950846355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.10666666428248088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.11111467083295186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.10546666383743286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.10264533758163452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.10588799913724263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.1032480001449585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.1090826690196991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.103493332862854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.10773332913716634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.1037066678206126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.10755733648935954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.10450667142868042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.059802666306495667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.06076799829800924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.05973866581916809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.06173333525657654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.059279998143514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.06098133325576782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.058634668588638306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.057850668827692665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.05801600217819214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.05440000196297964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.05698133508364359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.057946667075157166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.05489066739877065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.05513600011666616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.06043733159701029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.059450666109720864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.059621334075927734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.05692266424496969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.05996799965699514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.058143998185793556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.05844266712665558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.06027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.03845866769552231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.03825599948565165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.03775999943415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03794133414824804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.039621333281199135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.036389333506425224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.03885333240032196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.037392000357309975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.10551466544469197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.03800000001986822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.03709333389997482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.03823466598987579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.04088533421357473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.038362666964530945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.02640533447265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.026954665780067444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.026895999908447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.10963199536005656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.025008000433444977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.026176000634829204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.02683199942111969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.018112000077962875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.02605333427588145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.1381706694761912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.14018133282661438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.1381333371003469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.12583466370900473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.13801067074139914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.12548266847928366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.1400213340918223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.14017599821090698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.13808000087738037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.12727466225624084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.14261866609255472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.14350933829943338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.14008532961209616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.13293332854906717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.14247999588648477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.1316480040550232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.14418133099873862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.14452266693115234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.1421226660410563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.1342080036799113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.142794668674469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.07866133252779643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.07856533428033192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.1404266655445099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.07706666489442189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.07814933359622955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.07685333490371704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.0767626663049062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.13248533010482788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.0705866664648056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.0748533308506012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.0751146674156189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.07657066484292348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.06857599814732869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.07653866708278656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.07679466903209686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.0765173335870107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.07236800094445546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.07662400106589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.0763626645008723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.07250133156776428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.07241599758466084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.04771733283996582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.04773333172003428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.04577599962552389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.0458133320013682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.04680533210436503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.04589866598447164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.046096002062161766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.04376000165939331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.046037331223487854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.045498669147491455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.045925334095954895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.031157332162062328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.029824001093705494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.031370667119820915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.031008000175158184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.030645333230495453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.07331199944019318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.030613332986831665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.022304000953833263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.021770666042963665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.02170666555563609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.022330666581789654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.017946666727463405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.1116373340288798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.11174399654070537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.1016533374786377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.11205333471298218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.1016373336315155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.11395200093587239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.11166399717330933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.11171199878056844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.10180800159772237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.11169067025184631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.10122666756312053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.11171199878056844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.11248000462849934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.11153067151705424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.10528000195821126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.11322666207949321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.10514133175214131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.11133333047231038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.11236266295115153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.11192533373832703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.10355200370152791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.11193600296974182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.1035146713256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.10934399565060933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.062080000837643944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.064560001095136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.06447466711203258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.06453866759936015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.058415999015172325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.05827199916044871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.06630399823188782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.06438933312892914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.06451733410358429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.06010666489601135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.06518400212128957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.06613333523273468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.06464000046253204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.06514133512973785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.066170667608579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.06460266808668773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.06515199939409892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.06038933495680491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.03978666663169861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.0373333344856898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.06507733464241028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.06650133430957794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.036474667489528656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.036943999429543815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.028991999725500744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.02565866708755493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.02740799884001414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.026416001220544178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.019727999965349834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.016528000434239704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.10051733255386353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.10131200154622395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.10053867101669312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.09095999598503113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.09206933776537578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.09929600358009338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.09913067022959392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.09934932986895244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.09118400017420451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.09988799691200256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.09109866619110107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.10130666693051656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.0993226667245229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.09956266482671101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.09311999877293904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.10010666648546855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.09914666414260864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.0990773340066274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.09212266405423482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.10130133231480916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.09313600262006123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.05799466868241628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.09925867120424907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.0563679983218511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.09388800462086995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.05598400036493937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.056517332792282104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.056314667065938316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.05817066629727682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.05576533575852712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.052229334910710655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.052383999029795326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.056048000852266945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.056287998954455055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.05794133245944977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.05414933462937673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.05439466734727224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.05762666463851929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.05791999896367391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.055957332253456116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.05376533170541128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.035717333356539406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.03405333310365677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.033743999898433685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.033344000577926636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.03186133255561193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.035760000348091125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.03365333378314972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.02555199960867564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.03527999917666117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.02499199906984965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.01985599969824155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.01870399961868922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.016549333930015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.016234666109085083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.01462399959564209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,0,0.08450667063395183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,0,0.08655466636021932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,0,0.0851146678129832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,128,1,fp8,fp8,0,0.07903466622034709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,0,0.08688533306121826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,64,0,1,fp8,fp8,0,0.07851733267307281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,0,0.08689066767692566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,128,1,fp8,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,fp8,fp8,0,0.07860800127188365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,0,0.08664000034332275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,0,0.08703466256459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,0,0.08678932984670003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,128,1,fp8,fp8,0,0.07881600161393483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,0,0.084906667470932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,64,0,1,fp8,fp8,0,0.07852800190448761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,0,0.08498133222262065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,0,0.08505599697430928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,0,0.08559999863306682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,0,0.08525866270065308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,0,1,fp8,fp8,0,0.07868800063927968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,float16,float16,0,0.05194666484991709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,0,0.050069332122802734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,128,1,fp8,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,0,0.05199466645717621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,64,0,1,fp8,fp8,0,0.046122665206591286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,0,0.051818668842315674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,0,0.05051200091838837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,128,1,fp8,fp8,0,0.046821330984433494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,64,0,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,0,0.05002133548259735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,0,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,128,1,fp8,fp8,0,0.047824000318845115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,0,0.050160000721613564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,64,0,1,fp8,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,0,0.050053333242734276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,128,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,64,0,1,fp8,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,0,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,128,1,fp8,fp8,0,0.04615999758243561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,64,0,1,fp8,fp8,0,0.04629333317279816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,128,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,0,0.03180799881617228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,128,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,64,0,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,0,0.031770666440327965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,128,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,64,0,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,0,0.03178133318821589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,0,0.03319466610749563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,0,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,0,0.03177600105603536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,0,0.031658666829268135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,128,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,64,128,1,fp8,fp8,0,0.07886399825414021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,64,128,1,fp8,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,64,128,1,fp8,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,0,0.02402666707833608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,0,0.02422933280467987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,64,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,64,0,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,128,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,64,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,64,0,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,0,0.016714667280515034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,0,0.015754666179418564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,0,0.016373333831628162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,0,0.015978666643301647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,0,0.01578666642308235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,64,0,1,fp8,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,0,1,fp8,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,0.6814560095469157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,0,0.015615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,0.6873706976572672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,0.6140693426132202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,0.6954987049102783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,4.219488143920898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,0.6997706890106201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,4.216586748758952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,0.6277066469192505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,4.231861432393392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,0.731386661529541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,3.820474624633789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,4.229733467102051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,0.7196106910705566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,0.6500800053278605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,4.253184000651042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,3.8446667989095054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.41785601774851483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,4.250927925109863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.4143679936726888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.3940693140029907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,2.234015941619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.36507733662923175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,2.243237336476644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,2.0365440050760903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.37116265296936035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.3350293238957723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,3.863845189412435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,2.1830080350240073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.36953067779541016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.3725866476694743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,2.1882294019063315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,1.986789385477702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.33717866738637287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,2.1854880650838218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.37748265266418457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.380079984664917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,1.9916213353474934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.34491201241811115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,2.191370646158854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.23002133766810098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,2.2001867294311523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,2.0007893244425454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,1.1942613124847412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,2.186768054962158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.21980800231297812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.20800000429153442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,1.2028640111287434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.20779200394948324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,1.1682133674621582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.19156799713770548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.24286399284998575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.21177599827448526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,1.1673920154571533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,1.0680373509724934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,1.096453348795573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.19538666804631552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,1.1716266473134358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.21579732497533163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,1.1724159717559814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.2180746595064799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,1.1755680243174236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.2118826707204183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,1.179754654566447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,1.0771679878234863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,1.0720053513844807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.16684265931447348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.16670932372411093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.7148959636688232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.15456533432006836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.71014936765035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.6401493151982626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.16514133413632712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.16665066281954447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.15262400110562643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.20438933372497559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.6974720160166422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.6397440036137899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.16472533345222473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.16784000396728516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.6970773537953695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.15431466698646545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.6972106297810873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.1653439998626709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.6396640141805013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.16453867157300314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.6977600256601969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.6978826522827148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.5204746723175049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.5182773272196451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.6978507041931152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,0.46250665187835693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.6418986717859904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.5210880041122437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,2.512282689412435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,2.269733270009359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.5259946584701538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,0.4893706639607747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,2.508863925933838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.5342986583709717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,2.2791733741760254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,2.504543940226237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.5383040110270182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,0.48768532276153564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,2.5220799446105957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.31409066915512085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,2.5163092613220215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,2.5280319849650064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.31616532802581787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,1.345578670501709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,1.353962739308675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,1.2311840057373047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.2770880063374837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.27723199129104614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,2.293285369873047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.2983893354733785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,1.309653361638387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,1.1918666362762451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.2795146703720093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.29180266459782916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,1.3094879786173503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.25247466564178467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,1.3162933190663655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,1.1962026755015056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.2876426577568054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.3078346649805705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,1.3125759760538738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.2591200073560079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,1.3225546677907307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.17564266920089722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,1.3243892987569172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.26340266068776447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.17908267180124918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.16733866930007935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,0.7405440012613932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,0.6760640144348145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.15642666816711426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.15660799543062845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,0.7167466481526693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,0.735152006149292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,0.7157440185546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.6532373428344727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.15846932927767435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,1.204634666442871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.16881599028905234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,0.7170453071594238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.14841600259145102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.16173866391181946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.6566880146662394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.1463573376337687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.16474133729934692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,0.7216853300730387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,0.7226239840189616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.6625066598256429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.1240000029404958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.44846399625142414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,0.7178239822387695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.44972801208496094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.15332266688346863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.12337600191434224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.44549334049224854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.12378666798273723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.11924266815185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.4450720151265462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.41259201367696124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.12361066540082295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.11590933799743652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.44553065299987793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.12402666608492534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.11574400464693706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.4458773136138916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.40881601969401044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.40932798385620117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.12382400035858154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.12364799777666728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.445093313852946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.4455999930699666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.40878931681315106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.43038400014241535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.433354655901591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,1.8067679405212402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.3869333267211914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.43641066551208496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,1.8158666292826335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,1.6410986582438152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.4471466541290283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.39452266693115234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,1.816736062367757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.4474133253097534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,1.8246240615844727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.46421865622202557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,1.826266606648763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.4061066706975301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.11620266238848369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.2591093381245931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,1.6498346328735352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,1.6591733296712239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.2648533384005229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.24444266160329184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,0.9919520219167074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,0.9027360280354818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.22936532894770303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.2330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,0.9541707038879395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,1.8336426417032878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.21173866589864096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,0.9838026364644369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.23254400491714478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,0.9563360214233398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,0.8695893287658691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.23609066009521484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,0.955573320388794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.24677334229151407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,0.9573919773101807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,0.8734026749928793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.25066665808359784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.22210667530695596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,0.9618879954020182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.1595200002193451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,0.9680639902750651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.15262400110562643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.14443733294804892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.5486026604970297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.5027039845784506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,0.8798666795094808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.13597333431243896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.2160373330116272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.1363146702448527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.529141346613566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.12346667051315308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.5304853518803915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.48131199677785236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.1360373298327128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.5388746658960978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.12550933162371317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.5317973295847574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.4841333230336507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.5473119815190634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.14011733730634054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.5315893491109213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.13176533579826355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.5341813166936239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.49217601617177326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.10917333761850993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.10963199536005656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.10342933734258015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.1376586655775706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.3399946689605713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.31377599636713666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.10912000139554341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.3389813502629598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.3391626675923665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.31297600269317627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.10934933026631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.1360213359196981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.10293866197268169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.34010132153828937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.3118293285369873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.1097813347975413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.33929598331451416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.10994133353233337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.10307733217875163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.33900801340738934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.33960533142089844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.11105066537857056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.3123040000597636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.3386559883753459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,0.6705813407897949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,2.3773013750712075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,0.5968639850616455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,0.6665173371632894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,2.383135954538981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,0.6837920347849528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,2.3965706825256348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,0.6265866756439209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,0.6790826320648193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,0.6970720291137695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,2.1668853759765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,0.703285296758016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,2.151360034942627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,0.6342506806055704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,2.4134666124979653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,2.403103987375895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,2.188357353210449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.39924800395965576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.3654719988505046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.3907359838485718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,1.2851893107096355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,1.2762293020884197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.3477333386739095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.35154132048288983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,1.2275306383768718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.3142720063527425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,1.2309599717458088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,1.1140906810760498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.3512800137201945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.3555946747461955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,1.2352373600006104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.32069865862528485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,2.4157387415568032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,1.2383413314819336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,1.1207573413848877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.3599253495534261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.36540265878041583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.3286186655362447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,1.2439359823862712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.21344000101089478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,1.2455039819081624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,1.1305279731750488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,1.163424015045166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.20138667027155557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,0.6858399709065756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.1873706579208374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,0.6527573267618815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.18897600968678793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.18542933464050293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.6265600124994913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,0.654586672782898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.5990026791890463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,0.6569013198216757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.2173866629600525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.1917919913927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.17695466677347818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,0.6584959824879965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.6014560063680013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.19596266746520996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.18961066007614136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.198469340801239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.18319465716679892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,0.6823679606119791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,0.666042685508728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.6062933206558228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.1240000029404958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.1260693371295929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.12270933389663696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.3859626849492391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.3573066790898641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.1114453375339508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,0.6633173227310181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.37218133608500165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.33694934844970703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.11381333072980244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.38450666268666583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.37094934781392414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.1032960017522176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.37296001116434735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.33879999319712323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.11362666885058086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.37309332688649494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.3742613395055135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.11346133550008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.3447146813074748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.24654932816823324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.37176533540089923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.24666666984558105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.2262186606725057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.08847467104593913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.08905067046483357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.2451840043067932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.08885332942008972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.08436266581217448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.08461333314577739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.24554665883382162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.2265546719233195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.24644267559051514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.08946667114893596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.0835040012995402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.08859733740488689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.2263466715812683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.09063999851544698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.2469386657079061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.08925867080688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.08469866712888081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.24701333045959473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.24618132909138998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.503162662188212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.08892800410588582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.5061813195546468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,0.449349323908488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.22819199164708456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,1.4553759892781575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.5102666616439819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.5142346620559692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,0.4597173531850179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,1.3097279866536458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,1.4626453717549641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,1.3204320271809895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.5229546626408895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.5266613165537516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,1.4750879605611165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,0.47462932268778485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,1.4525705973307292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,1.4833332697550456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,1.3354399998982747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,0.7956213156382242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.278874675432841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,0.8016053040822347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,1.4615893363952637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.2644373377164205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.3046986659367879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,0.7569386959075928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.26523733139038086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.24155199527740479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,0.7580373287200928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,0.686346689860026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.26667733987172443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.2993546724319458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.26951466004053753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,0.7594666481018066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,0.7645866870880127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.2762719988822937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,0.7786719799041748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,0.726464033126831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.2781599958737691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.25225067138671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,0.6930826505025228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,0.7010773022969564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.1625440021355947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.4325386683146159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.4353333314259847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.398362676302592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,0.7723626295725504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.13955199718475342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.14223466316858926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.16666133205095926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.1300266683101654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.41043198108673096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.37374401092529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.14215999841690063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.4103786547978719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.13387200236320496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.4121119976043701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.37751468022664386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.1463573376337687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.24507200717926025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.416592001914978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.14845866958300272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.417797327041626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.3835039933522542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.09070400396982829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.2474773327509562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.09214400251706441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.25082133213679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.23311465978622437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.08492799599965413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.24050666888554892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.4074079990386963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.08460799853006999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.07860266665617625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.24064000447591147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.2198773423830668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.08480532964070638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.2407039999961853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.08611733714739482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.07900266846021016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.24274667104085287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.21940267086029053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.08666666348775227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.24292800823847452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.09353599945704143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.08679999907811482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.08066666622956593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.2426453431447347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.22167466084162393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.06860800087451935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.149125337600708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.1704639991124471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.157151997089386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.16908266146977743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.06833600004514058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.16912533839543661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.1695786714553833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.15657599767049155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.06817066669464111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.06651733318964641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.16991466283798218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.1583039959271749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.06846933563550313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.1707893411318461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.06633600095907848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.06926399966080983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.16936000188191733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.156741331020991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,0.6642666657765707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,0.6678720315297445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,0.5939840078353882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,1.461535930633545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,0.6785493691762289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,1.4729386965433757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.16993600130081177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,1.3056106567382812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,0.6829066276550293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,0.6081386804580688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,1.4714560508728027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,0.6985973517100016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,1.3237813313802083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,0.702064037322998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,0.6299733320871989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,1.4944799741109211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.3947360118230184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,1.4963520367940266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,0.796725352605184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.3983146746953328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.3767999807993571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,1.4570719401041667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,0.7293279965718588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.3389600118001302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.34224534034729004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,0.7495786348978678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.3081386685371399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,0.7515626748402914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,0.6770133177439371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,0.801637331644694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.347925345102946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.31329600016276044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,0.7567520141601562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.3535413344701131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.35338131586710614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,1.3457172711690266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,0.7629173596700033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.3577813307444255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.33771200974782306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,0.7684746583302816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,0.6936000188191732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.20558400948842367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,0.7552053133646647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.4238719940185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.20987200736999512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.19365866978963217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.42819734414418537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.4119360049565633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.17934399843215942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,0.6850506464640299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.16692266861597696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.39604798952738446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.3631093502044678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.18081067005793253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.39769065380096436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.38977599143981934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.16757333278656006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.3996373414993286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.36407466729482013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.1874026656150818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.4048159917195638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.1750613252321879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.40754131476084393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.1774079998334249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.11375466982523601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.23387199640274048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.11541333794593811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.18921599785486856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.2368746598561605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.18258132537206015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.3792693217595418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.22010666131973267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.10151466727256775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.22150399287541708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.10145066181818645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.09107733766237895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.222378671169281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.20146133502324423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.1013813316822052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.22213866313298544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.10291199882825215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.09223467111587524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.2244373361269633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.20168532927831015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.10424000024795532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.224400003751119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.10752532879511516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.09731733798980713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.2260319987932841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.2058346668879191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.06785599887371063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.1402186652024587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.14286933342615762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.1329813301563263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.06423466900984447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.13874666889508566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.1385546624660492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.1114026705423991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.06878399848937988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.13926933209101358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.06461333235104878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.06411199768384297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.13844266533851624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.12575466434160867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.06414400041103363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.13926399747530618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.13901333014170328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.1013759970664978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.0645546664794286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.0946666697661082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.05565333366394043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.10170666376749675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.10114666819572449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.093941330909729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.05572799841562907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.10108266274134318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.05392533540725708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.10101333260536194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.09403199950853984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.05592533449331919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.1015786627928416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.10228266318639119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.09354133407274882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.49983465671539307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,0.9240372975667318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.503605326016744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,0.4468640089035034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,0.9299413363138834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.10108799735705058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,0.8310453097025553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.5078506469726562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.5114986499150594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,0.9301813443501791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,0.4556853373845418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,0.9366933504740397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.12548800309499106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,0.8392746448516846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.5200373331705729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.5222293138504028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,0.4716426531473796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,0.9495466550191244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,0.8549493153889974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.2940266728401184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.06428800026575725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.5171626806259155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.298581341902415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.2720853288968404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.5219839811325073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,0.9473919868469238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.25914132595062256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.23451733589172363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.4805706739425659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.43398932615915936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.47404801845550537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.25734400749206543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.49088001251220703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.2619626720746358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.2403200070063273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.48478933175404865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.4395039876302083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.26916799942652386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.4910453160603841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.2634933392206828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.2731413245201111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.24679466088612875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.1567306617895762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.2777973413467407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.1591253379980723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.14866666992505392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.2818880081176758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.25941334168116253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.1325706640879313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.4469653367996216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.25438400109608966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.12363732854525249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.25496000051498413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.23441600799560547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.4816853205362956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.13596799969673157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.25650133689244586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.13804266850153604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.12811733285586038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.25894399483998615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.1341386636098226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.141866664091746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.144186665614446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.1358453333377838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.2646933396657308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.2379573384920756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.08508800466855367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.1564640005429586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.08710400263468425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.08534399668375652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.16049066185951233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.150026669104894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.4942293167114258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.07692266503969829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.14920533696810404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.07694399853547414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.07037333150704701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.1492959956328074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.13589866956075033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.07659199833869934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.26107199986775714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.07712000111738841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.0724426656961441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.1504853367805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.13611732920010886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.07869866490364075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.15057067076365152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.24446932474772134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.15084266662597656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.13820266723632812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.05233600238958994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.09941866993904114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.15041599671045938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.10150933265686035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.0929813285668691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.05028266708056132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.09914666414260864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.0992693305015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.08912533521652222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.09865066409111023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.04970666766166687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.09092799822489421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.05109866460164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.09929066896438599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.05054399867852529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.10006933410962422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.09080533186594646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.06985066831111908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.07063999772071838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.06617600222428639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.07051200171311696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.04156800111134847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.06454933186372121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.040805332362651825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.04127466678619385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.0705386648575465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.06481066842873891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.04046933352947235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.06896000107129414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.07069866855939229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.06613866488138835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,0.6945119698842367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,1.0271999835968018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,0.6102826595306396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,1.028714656829834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,0.7111360232035319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,0.6941760381062826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,0.7096319993336996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,0.6277120113372803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,0.9078026612599691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,1.0423093636830647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,0.9110079606374105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,0.7286453247070312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,1.0601279735565186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,0.7271786530812582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,0.6385706663131714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.39554667472839355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,0.9361279805501302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.5701280037562052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.3956000010172526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.3648746808369954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.5690666834513346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.5185813506444296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,1.0438026587168376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.5150826772054037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.30939199527104694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.5170826514561971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.34443199634552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.46352001031239826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.3463199933369954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.5226986805597941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.3531786600748698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.315013329188029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.5242453416188558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.46887465318044025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.36192532380421955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.5308053493499756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.3633333444595337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,1.057967980702718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.534554680188497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.4803786675135295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.20385066668192545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.2967946728070577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.19107733170191446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.29942933718363446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.2731039921442668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.34992531935373944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.17369065682093301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.26524267594019574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.17705066998799643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.1625226636727651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.2672320008277893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.24512000878651938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.17880533138910928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.269322673479716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.1797813375790914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.16689600547154745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.27007999022801715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.24901866912841797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.18620266517003378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.27694400151570636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.18718934059143066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.17286400000254312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.27821866671244305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.20790932575861612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.25485867261886597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.1629759967327118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.10804266730944316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.1646506687005361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.15434666474660239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.09578133622805278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.3247680068016052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.09714667002360027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.0869599978129069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.146997332572937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.13235732913017273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.09571733077367146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.1486133337020874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.11059733231862386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.1502133309841156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.13382933537165323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.09789333740870158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.14804266889890036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.1504693329334259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.10084799925486247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.09296000003814697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.15200000007947287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.1385706663131714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.06344533463319142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.09507200121879578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.08794666330019633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.0962506632010142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.09058133761088054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.058592001597086586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.09106666843096416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.099263995885849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.0658133327960968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.09136000275611877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.060559997955958046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.05415999889373779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.09318932890892029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.08259200056393941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.06044266621271769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.09185066819190979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.06010666489601135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.09317333499590556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.08505599697430928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.04377600053946177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.06659199794133504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.06618133187294006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.04358399907747904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.04203199843565623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.05641066531340281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.06379733482996623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.03997866561015447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.06532800197601318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.04338666796684265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.08264000217119853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.043493335445721946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.04061333338419596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.0347680002450943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.05049600203831991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.05232533315817515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.04814399778842926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.03350933392842611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.0645653357108434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.048357332746187844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.03362133353948593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.05076266825199127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.05173333485921224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.05203199883302053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.04824000100294749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.49538131554921466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,0.6593386729558309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.4978880087534587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.06305600206057231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,0.6631040175755819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.5888746579488119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.5095946788787842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,0.6764640013376871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,0.45368532339731854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,0.6873546441396078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.6016960144042969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,0.4420479933420817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.5233013232549032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,0.6890453497568766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,0.46634666124979657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.5127786795298258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,0.6920639673868815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.2916693290074666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.3796000083287557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.2984000047047933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.2734346588452657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.38605332374572754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.3510293165842692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.5257013241449991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.34071465333302814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.23116799195607504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.34303998947143555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.30796800057093304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.2566346724828084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.25300800800323486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.34355199337005615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.26031466325124103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.6142026583353678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.23748266696929932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.3466293414433797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.26764265696207684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.35391465822855633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.2554080088933309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.2433333396911621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.3150613307952881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.32073066631952923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.15542933344841003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.20399999618530273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.15901866555213928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.20660799741744995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.3577280044555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.1329813301563263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.1786293387413025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.13286933302879333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.26974932352701825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.12050132950146993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.18141865730285645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.13370133439699808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.19141866763432822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.1363200048605601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.12609066565831503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.18450667460759482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.16714666287104288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.1492693324883779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.18779200315475464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.18183465798695883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.1418293317159017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.13221866885821024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.18955733378728232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.1755146582921346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.08193066716194153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.11160000165303548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.08556266625722249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.08371200164159139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.11316800117492676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.16286399960517883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.07425066828727722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.10265066226323445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.07588266829649608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.09308266639709473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.07500266532103221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.1030453344186147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.07585066556930542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.06852266689141591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.10546666383743286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.09316800038019817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.07704000174999237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.1034986674785614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.078575998544693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.0706826647122701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.105813334385554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.09634666641553243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.04786666731039683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.07026666899522145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.04673600196838379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.06459199885527293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.045909335215886436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.06651733318964641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.06754666566848755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.13757866621017456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.061717331409454346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.04587199787298838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.06654400130112965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.04660800099372864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.04340266684691111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.06852266689141591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.06089599927266439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.06718933085600536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.04568000137805939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.06257066627343495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.033359999457995095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.045706664522488914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.04203199843565623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.03232000023126602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.04371733466784159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.03159466634194056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.043696001172065735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.044138665000597634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.03323200096686681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.043951998154322304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.03190399954716364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.03193599979082743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.04606399933497111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.031167998909950256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.04607999821503957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.042037333051363625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.029077333708604176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.1077280044555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.029653333127498627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.02980799973011017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.03945599993069967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.04014399896065394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.04227200150489807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.04228266576925913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,0.6873066425323486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.5769973198572794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,0.520794669787089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,0.6847039858500162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.5932746728261312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,0.6985066731770834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.5794399976730347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,0.5492586692174276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,0.698357343673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.6428266763687134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.5990613301595052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,0.705952008565267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.5905599991480509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,0.5536853472391764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,0.6998986403147379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,0.6504960060119629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.38712533315022785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.3221759994824727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.3098133405049642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.3805973529815674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.3288266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.36137600739796955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.29871465762456256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.3540159861246745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.2983893354733785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.594597339630127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.2698773344357808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.35520001252492267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.3190666635831197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.30371199051539105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.36191999912261963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.3025599916776021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.6133866707483927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.36110401153564453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.33400531609853107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.3069546620051066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.3638613224029541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.3058346708615621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.2897973259290059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.3613813320795695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.336575984954834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.20338133970896402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.16478932897249857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.285098671913147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.20138667027155557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.19171200195948282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.15867732961972555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.18691200017929077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.1586186687151591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.14458133776982626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.18716265757878622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.17172267039616904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.16064533591270447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.19047466913859049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.1609440048535665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.15080533425013223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.18994667132695517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.17654933532079062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.1629866659641266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.19173866510391235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.16291200121243796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.15466666221618652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.19152534008026123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.18157333135604858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.09589333335558574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.17476266622543335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.09507200121879578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.09346666932106018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.17301867405573526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.11015466849009196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.0883626639842987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.10571199655532837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.08832533160845439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.07918933530648549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.10586133599281311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.11431466539700826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.09477333227793376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.0892639954884847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.10548800230026245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.08029333253701527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.10638399918874104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.09526399771372478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.08995200196901958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.10578133662541707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.0900320013364156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.0828906645377477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.10790399710337321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.09877333045005798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.05614933371543884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.0666133314371109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.05538133283456167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.06597333153088887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.05297600229581197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.06509866813818614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.06054399907588959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.054144000013669334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.06537599861621857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.05356266597906748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.04965866605440775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.06477866570154826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.06548266609509786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.05170666674772898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.11316266655921936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.05459199845790863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.03809600075085958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.04632000128428141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.04460800190766653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.03892799963553747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.04450133442878723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.03663466622432073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.04339733223120371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.03772266705830892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.04588800172011057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.03828266759713491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.0454773356517156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.04279999931653341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.039306665460268654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.043391997615496315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.03381866713364919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.033802665770053864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.04735999802748362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.03375466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.03299733251333237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.03265066693226496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.0220320001244545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.029717333614826202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.02181866765022278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.028399998943010967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.026842666169007618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.028949332733949024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.5620959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.5684373378753662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.5590506792068481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.5686560074488322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.505242665608724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.5735093355178833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.5717173417409261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,0.5290079911549886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.580074667930603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.5358239809672037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.5792906681696574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.589194655418396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.5752853155136108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,0.5337813297907511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.5838079849878947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.318832000096639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.32501333951950073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.5837173461914062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.31309332450230914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.2988266746203105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.31961599985758465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,0.5011306603749593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.29064534107844037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.537551999092102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.2888159950574239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.25912533203760785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.2946666677792867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.2632426619529724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.29654399553934735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.3007733424504598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.29561599095662433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.30318933725357056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.2998826702435811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.2945866584777832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.2976800004641215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.3019733428955078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.29575467109680176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.27587199211120605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.30024532477060956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.28065067529678345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.16923733552296957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.17212265729904175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.16684265931447348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.16008533040682474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.1698346734046936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.1618933379650116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.153957337141037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.15544000267982483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.15424000223477682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.13915200034777322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.1564959983030955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.14070933063824972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.15530666708946228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.15866133570671082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.15526400009791055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.274234672387441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.14589866995811462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.15863466262817383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.14539200067520142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.15705066919326782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.16110400358835855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.1569386621316274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.14874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.15918399890263876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.1488640010356903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.09496000409126282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.09318400422732036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.09110933542251587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.09472533067067464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.09211200475692749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.08663466572761536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.08701333403587341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.08710933725039165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.07748800019423167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.0865226686000824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.0788800021012624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.27742934226989746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.08906666437784831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.08692800005276997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.07766399780909221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.07846400141716003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.08754133184750874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.08905599514643352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.08730133374532063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.0821973333756129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.0890933374563853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.08690667152404785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.054005334774653115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.05533866584300995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.05433600147565206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.05227200190226237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.055248002211252846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.05308799942334493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.052842666705449425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.05413866539796194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.08258666594823201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.052842666705449425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.09516800443331401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.05258133510748545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.05442133545875549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.04916800061861674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.055642664432525635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.053616002202034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.05500266452630361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.05324266850948334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.04987200101216634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.0554613322019577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.05109333495299021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.03898133337497711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.038831998904546104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.03828799972931544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.03606399893760681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.03602133442958196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.036389333506425224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.03509333233038584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.03753600021203359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.0383093332250913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.036271999279658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.03737599899371465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.03800000001986822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.03786666691303253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.02476266771554947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.022554665803909302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.021530665457248688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.022837333381175995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.021733333667119343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.27166932821273804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.2685439984003703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.24291733900705972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.022229333718617756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.23678932587305704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.28011733293533325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.27699732780456543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.27163199583689374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.25734400749206543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.2659200032552083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.25179733832677204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.27832533915837604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.2739253242810567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.27451733748118085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.2760746677716573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.2595840096473694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.27138666311899823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.15864533185958862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.15658133228619894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.15691199898719788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.15070399641990662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.15500266353289285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.14882666865984598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.14620266358057657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.14264532923698425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.1444533367951711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.12851199507713318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.1418293317159017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.12585066755612692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.25497599442799884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.14433067043622336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.1458613375822703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.2738560040791829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.14454933007558188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.13316266735394797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.14843733112017313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.14693333705266318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.14822933077812195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.14034133156140646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.14498666922251383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.1379039982954661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.08946133653322856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.08771199981371562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.08668800195058186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.13660800457000732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.0849120020866394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.08293333152929942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.08085866769154866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.08319466809431712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.07268799841403961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.08046933511892955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.07192533214886983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.08292800188064575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.08281599978605907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.08304533362388611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.14802133043607077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.0811359981695811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.07302399973074596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.08291199803352356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.08268266419569652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.0788320004940033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.08144533137480418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.07594133416811626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.05115733544031779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.05129600067933401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.04958933095137278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.04828799764315287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.04811733464399973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.04641599953174591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.044768000642458596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.04545066754023234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.05021866659323374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.04876266419887543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.04969066878159841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.04554133117198944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.03709333389997482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.035375999907652535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.03819733361403147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.03631466627120972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.03554133325815201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.03514133393764496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.07559999823570251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.033088001112143196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.08629866441090901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.036117332677046456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.0330826664964358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.03401600072781245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.035973332822322845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.032730666299661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.023605334262053173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.02458133300145467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.03213333338499069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.02054399996995926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.020917333662509918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.020618667205174763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.018735999862353008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.01966933285196622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.020207999895016353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.15218666195869446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.1514240006605784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.15029866496721903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.1508639951546987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.020080000162124634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.15384533007939658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.13583466410636902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.15307733416557312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.14214932918548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.1525973379611969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.13664000233014426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.15705600380897522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.15472533305486044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.14806933204332987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.1548426647981008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.14350400368372598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.09157333771387736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.09207466244697571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.09089600046475728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.09060800075531006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.09040533502896626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.08961066603660583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.14802133043607077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.08462400237719218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.1532906691233317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.08489066362380981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.07500799993673961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.08430400490760803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.0759093314409256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.08418666323026021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.08500267068545024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.08488532900810242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.07709333300590515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.0845973292986552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.07789333164691925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.08597333232561748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.08541867136955261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.08517332871754964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.15526400009791055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.08507200082143147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.05358933409055074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.052784000833829246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.05279466509819031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.052655999859174095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.05012799799442291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.05049066742261251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.04632000128428141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.050255998969078064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.0470719983180364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.05083733300367991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.04655999938646952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.051967998345692955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.05184000233809153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.04834666848182678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.03403199960788091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.03457599878311157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.0831413318713506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.03271466741959254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.03295466552178065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.03516799956560135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.03381866713364919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.07991466422875722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.03265066693226496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.025994665920734406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.03298133363326391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.02603200078010559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.0262773334980011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.026133333643277485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.025850666066010792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.026746665438016255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.026789332429567974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.018789333601792652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.018624000251293182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.018816000471512478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.026906666656335194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.017653333644072216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.01658133293191592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.11152000228563945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.11156800389289856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.11042666435241699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.09861333171526591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.10947733124097188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.09923733274141948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.11121599872907002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.11028266946474712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.10971200466156006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.10102933645248413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.11132799585660298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.0993280013402303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.11163199941317241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.11169600486755371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.11125866572062175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.10191466410954793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.10998400052388509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.06458666423956554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.0650133341550827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.06487466891606648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.06391466657320659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.06417599817117055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.0584799995024999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.06261866788069408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.058373332023620605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.06444799900054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.06429333488146464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.0580320010582606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.06388266881306966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.06331199904282887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.05844266712665558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.041519999504089355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.04207466542720795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.03934400031963984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.04141866664091746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.10143466790517171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.04001600046952566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.04048533240954081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.03867733230193456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.041434665520985924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.040762667854626976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.03924266745646795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.04011200120051702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.04302933315436045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.03839466720819473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.02769600103298823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.028629332780838013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.028175999720891316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.02864533414443334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.01639466608564059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.03921066721280416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.01595199977358182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.017786666750907898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.08923199772834778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.0819893330335617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.08918933073679607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.08956266442934673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.09063466389973958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.08100800216197968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.09055466453234355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.0895146628220876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.09098133444786072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.09083200494448344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.09059733152389526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.08062399923801422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.08204266428947449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.05499200026194254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.05445333321889242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.0810346653064092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.05471999943256378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.051669334371884666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.053861334919929504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.05412800113360087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.0539680023988088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.0543093333641688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.053823997577031456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.049733335773150124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.035829332967599235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.036042665441830955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.03531199942032496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.03699733316898346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.034917332231998444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.03282133241494497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.03409066547950109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.03538133452335993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.03384000062942505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.03566399961709976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.03649600098530451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.024458666642506916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.03642133375008901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.021738665799299877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.024138666689395905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.020432000358899433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.020069333414236706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.08052266637484233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.08028799792130788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.08053866525491078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.07462400197982788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.08099733293056488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.07974400122960408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.07478933533032735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.08005333443482716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.0747519979874293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.07860266665617625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.08031466603279114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.07969066500663757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.07272533575693767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.049839998284975685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.04776533444722494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.050255998969078064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.04558399816354116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.04444799820582072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.048512001832326256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.04914666712284088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.04816000163555145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.031983998914559685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.029706666866938274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.029743999242782593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.0296426663796107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.030213333666324615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.02996266633272171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.032032000521818794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.03014400104681651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.021573332448800404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.048063998421033226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.01647466669480006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.016672000288963318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.01626666635274887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,float16,float16,0,0.06898133456707001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,float16,float16,0,0.07022933165232341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,float16,fp8,0,0.06868800024191539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,128,1,fp8,fp8,0,0.06448000172773997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,float16,fp8,0,0.06841066479682922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,64,0,1,fp8,fp8,0,0.06403733293215434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,float16,float16,0,0.06963199873765309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,float16,float16,0,0.06842133402824402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,fp8,fp8,0,0.06234666705131531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,float16,fp8,0,0.07021866738796234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,0,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,float16,float16,0,0.07072000205516815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,float16,float16,0,0.06989866495132446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,float16,fp8,0,0.07054399947325389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,128,1,fp8,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,float16,fp8,0,0.07070399820804596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,64,0,1,fp8,fp8,0,0.06400000055631001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,0,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,float16,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,fp8,fp8,0,0.039173332353432976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,0,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,64,128,1,float16,float16,0,0.042026668787002563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,128,1,fp8,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,64,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,float16,float16,0,0.04187199970086416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,64,128,1,float16,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,fp8,fp8,0,0.03926933308442434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,float16,float16,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,float16,float16,0,0.041663999358812966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,float16,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,128,1,fp8,fp8,0,0.03926933308442434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,128,1,float16,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,0,0.028186666468779247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,64,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,float16,float16,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,128,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,64,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,64,0,1,float16,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,float16,float16,0,0.02829866607983907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,64,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,128,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,0,0.022181332111358643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,64,128,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,float16,float16,0,0.021514666577180225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,float16,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,128,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,64,0,1,fp8,fp8,0,0.020010666300853092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,64,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,64,0,1,float16,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,float16,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,128,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,64,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,64,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,0,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,128,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,float16,fp8,0,0.01623999948302905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,float16,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,fp8,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,float16,float16,0,0.01597333326935768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.4482506513595581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,64,0,1,float16,fp8,0,0.015840000162522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.45236265659332275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.4161226749420166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,2.8375040690104165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.47189335028330487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,2.841610590616862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.4658133188883464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.43622398376464844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,2.855253219604492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.478549321492513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,2.858016014099121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,2.589930693308512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,2.6057119369506836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.4809279839197795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.45287466049194336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,2.8706719080607095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.27109867334365845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,2.8813759485880532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,2.6238773663838706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.285589337348938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.2609600027402242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,1.5279520352681477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,1.3955893516540527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.24011733134587607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.24435732762018839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.22710933287938437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,1.5183520317077637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,1.4875680605570476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.24680533011754355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,1.4890774091084797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,1.3637973467508953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.23458133141199747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,1.4951945940653484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,1.3700853983561199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.2558719913164775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.24889600276947021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,1.4949599901835124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.24261333545049033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,1.507578690846761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.154858668645223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.2591200073560079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.1570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,0.8341440359751383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.1509226659933726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,0.8397226333618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,0.7699093023935953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.14013866583506265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.1402346690495809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,1.5029385884602864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,0.8185973167419434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,0.7478880087534586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,1.3768000602722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.1384266714255015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.14129599928855896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.13404800494511923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,0.8166773319244385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,0.8199573357899984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,0.7529973189036051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.1465013325214386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,0.8245173295338949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.14205333590507507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,0.8196693261464437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,0.8265813191731771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,0.7589226563771566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.11372799674669902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.1458080013593038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.11188800136248271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.11001599828402202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.506767988204956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.46487998962402344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.11342933773994446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.5045440196990967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.5047253370285034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.46386667092641193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.11358400185902913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.11365333199501038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.11370666821797688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.10754133264223735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.5052586793899536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.465989351272583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.11333866914113362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.11342933773994446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.5067360003789266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.10743467013041179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.5044426520665487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.5058079957962036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.4652959903081258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.34092267354329425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.3439040184020996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.3161440094312032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,1.6884320576985676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.36614398161570233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,1.6922987302144368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.37118931611378986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.3269866704940796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.5055413246154785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,1.5447093645731609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,1.7043946584065754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,1.5563519795735676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,1.7005707422892253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,1.713205337524414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.361407995223999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,1.7189812660217285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.365231990814209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.34063466389973956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,1.5692747433980305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,0.9259680112202963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.21351999044418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.1829813321431478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,0.8518186410268148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.21000534296035767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.18739734093348184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,0.8972640037536621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.17534933487574259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,0.8984693686167399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,0.9309546947479248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.19961067040761313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.19146132469177246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,0.9040853182474772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.18076266845067343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,0.9061813354492188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,0.8300320307413737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.19768534104029337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.19954667488733926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,0.9115359783172607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.18774400154749551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.12104533116022746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,0.8361653486887614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.12638933459917703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,0.8261333306630453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.11957866946856181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.5209386746088663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.4809120098749797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.10934933026631673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.5074079831441244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,0.9152586460113525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.1023466686407725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.5191146532694498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.4639413356781006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.11147200067838033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.1113866666952769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.10385599732398987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.5086613496144613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.1135093371073405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.5088800191879272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.11560533444086711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.5071146488189697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.5110773245493571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.47189335028330487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.4676906665166219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.08896533648173015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.32870399951934814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.5109599828720093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.08492799599965413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.1109920044740041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.32663466533025104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.0869599978129069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.3266400098800659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.08745066324869792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.0827466646830241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.32654933134714764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.3016693393389384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.08874133229255676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.08770666519800822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.0844586690266927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.3020159999529521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.3017599980036418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.08872000376383464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.3280906677246094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.32654400666554767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.08820266524950664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.0827893316745758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.3272213339805603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.3264159957567851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.28260799249013263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.285589337348938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,1.2248106797536213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.2653706669807434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.2900586724281311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,1.2305813630421956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,1.1223093668619792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.30852266152699787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.27275200684865314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,1.235200007756551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.2988746762275696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,1.2354293664296467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,1.1289013226826985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.304202675819397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.28942400217056274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.3020319938659668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.17538666725158691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,1.2488746643066406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.17940799395243326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.18060266971588135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,0.6842719713846842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,1.2445813020070393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.15285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.6295040051142374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,1.1407306989034016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.1546933352947235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,0.6813493569691976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.14847999811172485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.15847999850908914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,0.6589226722717285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.15838399529457092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,0.6599040031433105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.1607360045115153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,0.6640373468399048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,0.6566506624221802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.6209333340326945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.16475733121236166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,0.6705546379089355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.15878400206565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,0.6697226365407308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.617418646812439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.10240532954533894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.3898293177286784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.10381866494814555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.3921653429667155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.3621600071589152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.09499200185139973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.6069440046946207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.3798133134841919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.16746666034062704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.38017066319783527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.34693864981333417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.09531199932098389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.09563199679056804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.08924800157546997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.381381352742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.09507200121879578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.09688533345858256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.09824533263842265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.09445333480834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.380293329556783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.3845173517862956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.353274663289388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.0790293316046397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.2558719913164775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.0788800021012624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.25490667422612506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.2364799976348877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.380079984664917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.25473066171010333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.25494933128356934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.23595199982325235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.3482826550801595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.2560639977455139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.07941333452860515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.25515733162562054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.23666133483250937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.07856533428033192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.25471999247868854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.07852800190448761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.2549813389778137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.2344906727472941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.078575998544693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.43905067443847656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.07898133496443431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.406879981358846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,1.5967733065287273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.4654719829559326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,1.6029653549194336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,1.4573173522949219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.45560534795125324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.42370132605234784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.4422293504079183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,1.6135306358337402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.4683519999186198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,1.6167999903361003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.4717866579691569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,1.63100798924764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.44010667006174725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.26047466198603314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,1.6360479990641277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,1.493941307067871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,0.8695092995961508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.2651093403498332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.2646399935086568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.22989332675933838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,0.8743946552276611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,0.800976037979126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.23153066635131836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,0.8347466786702474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,0.8365546862284342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,0.7667626539866129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.23592533667882284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.2386186718940735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,0.8411253293355306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.22366400559743246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.21702400843302408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,0.843989372253418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.24426132440567017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,0.7738933563232422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.24837332963943481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.23174933592478433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,1.4723199208577473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.1418826679388682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,0.7814506689707438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.14521599809328714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,0.8515146573384603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.13885333140691122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,0.8552693525950114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.4778453509012858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.47441601753234863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.4392586549123128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.12160533666610718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.12392000357309978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.45348799228668213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.45501331488291424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.4188106854756673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.12574932972590128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.4575146834055583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.12261333068211873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.4594613313674927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.11581866939862569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.13100266456604004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.42443732420603436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.1334826648235321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.12799466649691263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.46643733978271484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.4294293324152629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.08298133313655853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.27459200223286945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.08322666585445404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.2777493397394816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.4638880093892415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.07896000146865845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.12787200013796488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.2700960040092468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.07876266539096832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.07443200051784515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.27132266759872437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.24676799774169922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.07889066636562347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.26956266164779663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.2711946765581767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.24849067131678262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.0862559974193573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.2714986602465312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.08085866769154866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.07673066854476929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.27342400948206586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.25226666529973346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.06405866642793019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.18955733378728232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.07926400005817413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.2584853370984395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.17500799894332886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.18889067570368448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.06423999865849812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.07865599791208903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.06187200049559275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.18980266650517783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.17511467138926187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.18923733631769815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.18930133183797201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.1896053353945414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.17511467138926187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.062309334675470986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.18959466616312662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.18936532735824585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.17516799767812094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.33446399370829266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.3369599978129069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.06410133341948192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.3094453414281209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,0.9852000077565511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,0.8926719824473063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.3433813254038493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.3472586472829183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,0.979850689570109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.32067734003067017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,0.9949973424275717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,0.9053440093994141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.3559946616490682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.35922666390736896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,1.0030559698740642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.334666649500529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,1.0067466894785564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,0.9191466967264811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.20145599047342935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.20544532934824625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.19352533419926962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,0.9895040194193522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.5499146779378256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.5026826858520508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.17533334096272787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.5162826776504517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.16847999890645346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.5460053284962972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.518666664759318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.06201066573460897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.18017599980036417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.5221120119094849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.17733333508173624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.17193067073822021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.523904005686442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.4779733419418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.18887466192245483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.5313706795374552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.18143999576568604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.19022399187088013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.17912532885869345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.533461332321167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.11185066898663838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.4822080135345459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.3039146661758423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.10943999886512756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.30530667304992676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.2829333345095317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.09918399651845296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.28887999057769775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.09290132919947307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.28991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.2632799943288167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.09922666351000468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.29021332661310834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.10070932904879253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.09339200456937154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.2917226751645406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.11600533127784729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.10409599542617798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.294378658135732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.1014400025208791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.10587732990582784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.2962133288383484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.27342400948206586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.06622933348019917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.18112534284591675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.06866666674613953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.18388267358144125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.1702666680018107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.06190933287143707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.17778666814168295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.06228800117969513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.17840532461802164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.16367999712626138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.06253333389759064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.1771413286526998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.0580213318268458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.17903999487559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.1637333333492279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.06256533165772755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.17892267306645712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.49024001757303876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.180842657883962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.1669173240661621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.1276693344116211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.05162133276462555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.1276479959487915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.11962667107582092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.12797333796819052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.05160533388455709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.12772267063458762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.11760532855987549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.053904001911481224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.12772799531618753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.054560000697771706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.12753599882125854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.11916266878445943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.06490666667620341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.12814399600028992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.05436266462008158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.12617599964141846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.43856000900268555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,0.9776906967163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.052202666799227394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.2666026751200358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.441920002301534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.40457598368326825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.45174400011698407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,0.8899359703063965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.4552266597747803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,0.9898186524709066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.4190613428751628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,0.9941066900889078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,0.9051787058512369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,0.9779146512349447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.46881067752838135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.47091734409332275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.43877867857615155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,1.0121066570281982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.05435200035572052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,0.9232319990793864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.2569706638654073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,1.007258653640747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.2616106669108073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.24555732806523642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.5432426532109579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.5011093219121298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.2241119941075643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.22661866744359335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.21197867393493652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.5378239949544271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.5099626779556274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.2327786684036255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.5132373174031576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.21865065892537436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.5084640185038248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.5161493221918741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.472213347752889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.23965332905451456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.5219360192616781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.22821333010991415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.526095986366272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.4814293384552002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.13607999682426453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.24301334222157797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.2919573386510213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.14017599821090698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.13295466701189676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.2960960070292155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.2739040056864421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.11585600177447002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.2720693349838257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.11773866415023804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.11151466766993205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.27321066459019977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.2513440052668254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.11993066469828288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.2746880054473877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.23381867011388144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.11571199695269267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.27738134066263836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.2565280000368754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.12589866916338602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.28142400582631427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.12967466314633688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.12366933623949687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.284496009349823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.264357328414917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.07715733349323273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.16689600547154745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.07904000083605449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.07751466830571492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.16932799418767294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.15891733765602112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.07039999961853027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.16057599584261575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.06622399886449178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.16146666804949442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.1465173363685608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.07073600093523662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.1606826682885488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.07257600128650665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.06814933319886525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.16286399960517883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.14864533146222433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.07303999861081441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.12160533666610718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.07468800246715546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.07091199855009715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.16454933087031046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.15029333035151163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.10786133011182149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.04826133449872335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.11014399925867717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.10106666882832845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.07226133346557617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.04588800172011057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.10762133200963338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.046207999189694725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.04427733520666758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.4675306479136149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.04642133414745331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.10731200377146403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.10877866546312968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.09860799709955852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.045935998360315956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.10745066404342651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.16246933738390604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.10735467076301575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.09948800007502238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.07871999839941661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.07875200112660725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.07252799967924754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.09923199812571208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.07839466631412506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.07459199925263722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.03856533269087473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.037392000357309975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.07880533238252004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.07449066638946533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.07877866427103679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.07901866734027863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.07344000041484833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.33879466851552326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.3404746850331624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.3125279943148295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.04753066599369049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.5741493304570516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.3500426610310872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.6406453450520834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.35177600383758545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.3249280055363973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.6283040046691895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.5872106552124023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.3643306493759155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.6297119855880737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.36445868015289307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.6546239852905273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.641648014386495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.6010026534398397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.2014453411102295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.35608001550038654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.2039466698964437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.1928960084915161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.33659199873606366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.3582773208618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.3327946662902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.3269439935684204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.1653600037097931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.32681600252787274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.3039199908574422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.654698650042216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.1790506641070048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.17176000277201334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.17428799470265707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.33349867661794025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.31014400720596313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.18865066766738892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.3410293261210124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.33162667353947956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.1896053353945414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.1699840029080709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.3430560032526652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.31728533903757733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.10857599973678589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.19451733430226645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.11098133524258931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.10725333293279012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.19644266366958618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.18487467368443808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.09332266449928284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.1786880095799764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.1802826722462972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.0865119993686676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.17841066916783652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.1649066706498464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.09511466821034749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.18080532550811768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.1832533280054728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.16693333784739176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.09922666351000468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.1835093299547831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.10115200281143188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.09733866651852925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.1860213279724121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.17507733901341757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.11335999766985576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.11433066924413045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.10687466462453206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.057114665706952415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.10991467038790385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.09537600477536519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.05657599866390228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.05435733497142792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.10968533158302307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.1015786627928416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.056346664826075234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.10948800047238667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.11030933260917664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.1011786659558614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.17934934298197427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.0634080022573471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.11230400204658508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.10329066713651021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.041749333341916404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.07502399881680806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.0582826683918635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.11051733295122783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.07652799785137177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.07307200133800507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.07509866853555043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.040181333820025124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.07472000022729237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.06880000233650208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.07500266532103221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.0423573354880015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.06853333115577698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.042064001162846885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.0747519979874293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.04364799956480662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.07471466561158498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.033557333052158356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.062368000547091164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.06169066826502482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.05860800047715505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.06218666831652323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.033215999603271484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.06227200229962667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.0621066689491272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.03202133377393087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.05835733314355215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.4583679835001628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,0.6868853569030762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.46212267875671387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.4185546636581421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.6857439676920573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.6205600102742513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.47973867257436115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,0.7038346926371256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.4793493350346883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.4294026692708333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,0.7041599750518799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.630730668703715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.49172266324361164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.49109331766764325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.4415786663691203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.0746666689713796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,0.7148746649424235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.6443413496017456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.38364267349243164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.2505706747372945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.38577067852020264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,0.7162666320800781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.35790932178497314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.26519999901453656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.3468693494796753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.2283466657002767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.21433599789937338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.346560001373291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.31962666908899945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.23722134033838907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.35412267843882245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.23675199349721274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.22240533431371054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.23071465889612833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.35452266534169513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.32872533798217773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.36495999495188397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.26606400807698566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.2295893430709839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.36553064982096356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.3373066584269206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.1393226683139801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.20440532763799033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.24617600440979004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.1402346690495809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.13562666376431784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.20754132668177286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.19445333878199259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.11589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.18108266592025757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.11762133240699768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.11070400476455688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.18378132581710815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.1705120007197062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.11786666512489319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.18593066930770874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.11621866623560588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.2485226591428121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.187717338403066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.17518399159113565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.12548800309499106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.19153600931167603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.12686933080355325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.12398933370908101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.1827733318010966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.07337066531181335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.11332799990971883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.07692266503969829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.1145919958750407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.11132799585660298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.06877866884072621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.10526399811108907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.07060266534487407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.09902399778366089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.0687253326177597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.10672533512115479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.07038400073846181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.0662773350874583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.10749333103497823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.10071999828020732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.07116800049940745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.11960533261299133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.06790400048096974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.10919466614723206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.10153599580128987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.04472533365090688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.07231999933719635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.0461760014295578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.07414933542410533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.06853333115577698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.04363733530044556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.07032000025113423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.06410133341948192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.04257600009441376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.07061866422494252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.043978666265805565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.07035199801127116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.06429333488146464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.04342400034268697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.07054399947325389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.07249066730340321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.06650133430957794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.0481333335240682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.04622933268547058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.047637333472569786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.030991998811562855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.04807466765244802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.0476746658484141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.029706666866938274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.04820266862710317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.04933333396911621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.04604266583919525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.19302932421366373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.04590400060017904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.34094401200612384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.4535786708196004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.10757333040237427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.34160534540812176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.313973327477773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.4549280007680257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.41576532522837323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.3556480010350545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.4702506860097249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.4690186580022176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.42876799901326496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.36797332763671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.48150400320688885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.369818647702535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.3385813236236572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.4795573155085246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.43858134746551514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.3263733386993408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.26336532831192017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.19266132513682047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.2664586702982585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.24860266844431558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.1715786655743917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.20145599047342935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.17277334133783975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.16475199659665427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.23248533407847086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.21794666846593222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.17817066113154092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.23708800474802652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.17994133631388345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.17084266742070517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.23921066522598267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.22398932774861655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.1867093245188395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.24792534112930298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.18943999210993448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.1792373259862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.25013333559036255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.23321066300074259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.10532266894976298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.14030399918556213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.35714133580525714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.10771200060844421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.14248533050219217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.13732799887657166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.09305066863695781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.1258026659488678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.09470933675765991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.08480532964070638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.12583999832471213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.11577600240707397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.09378666679064433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.12665067116419473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.0974026620388031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.08904000123341878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.23225067059199014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.1197119951248169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.0974666674931844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.13194666306177774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.09956266482671101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.0972213347752889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.133733332157135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.1262506643931071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.06026133398214976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.0804799993832906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.06063466767470042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.08288533488909404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.0545066644748052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.0763679991165797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.052239999175071716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.07944533228874207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.1302079955736796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.20573866367340088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.05659199754397074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.07975466549396515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.05726400017738342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.07891733447710673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.07852266728878021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.054661333560943604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.05755199988683065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.041850666205088295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.052560001611709595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.054832001527150474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.04098133246103922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.05190399785836538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.07533866663773854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.05190399785836538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.04809066653251648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.03965866565704346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.05180266499519348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.05209066470464071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.049135997891426086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.04009599983692169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.05187733471393585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.04178666571776072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.03830400109291077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.039279999832312264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.039349332451820374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.043098668257395424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.03799466788768768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.03886399914820989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.03856533269087473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.039173332353432976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.03788266579310099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.3946506579717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.4656746784845988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.3555946747461955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.4626986583073934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.41895465056101483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.3958933353424072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.46885867913564044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.3635520140329997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.4684586524963379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.4280746777852376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.39022934436798096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.4049760103225708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.4771786530812581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.4044160048166911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.38230399290720624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.4782559871673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.39394132296244305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.4455039898554484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.2528640031814575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.212719996770223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.21202133099238077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.25299733877182007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.24605866273244223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.20710933208465576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.242576003074646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.20501333475112915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.18709866205851236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.24276266495386759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.22223466634750366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.2081813414891561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.2464266618092855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.2072426676750183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.19143466154734293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.24638400475184122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.22626133759816489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.21285333236058554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.21225066979726157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.2000053326288859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.21478400627772012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.2502773404121399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.23501867055892944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.14061866203943887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.11648000280062358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.13939199844996134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.1360053320725759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.1111306647459666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.1323306659857432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.1032426655292511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.25195733706156415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.13265066345532736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.12380799651145935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.11754666765530904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.13501333196957907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.11333866914113362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.10592533151308696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.12432533502578735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.11057066917419434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.14013866583506265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.11807466546694438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.11185600360234578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.11003200213114421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.13792533675829569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.12949867049853006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.06624533236026764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.07904533545176189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.06542400022347768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.06836266815662384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.07939200103282928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.06285333136717479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.07730666796366374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.0625600020090739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.07538133362929027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.0702453354994456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.06444799900054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.07796800136566162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.06406933565934499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.07663466533025105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.06437333424886067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.07815466821193695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.0664160003264745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.11642133196194966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.07893866797288258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.07438933352629344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.051125332713127136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.03990400085846583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.13571733236312866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.04756266872088114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.0517439991235733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.05082666873931885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.051114668448766075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.03369066615899404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.031983998914559685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.026389333109060924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.03376533339420954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.02231466770172119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.06251200040181477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.027701333165168762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.0276053324341774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.02659733345111211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.02849599967400233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.3811519940694173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.3890933195749919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.3787999947865804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.34815998872121173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.3853386640548706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.3526826699574788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.38606401284535724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.39426668485005695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.38556798299153644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.35415999094645184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.3917920192082723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.3576800028483073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.39602665106455487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.4033120075861613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.39503467082977295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.3681120077768962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.4037546714146932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.2123039960861206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.2058560053507487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.20722132921218872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.20958934227625528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.21013865868250528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.19963200887044272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.20456000169118246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.19850667317708334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.18314667542775473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.20156800746917725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.3747093280156453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.2023413379987081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.20595200856526694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.20230400562286377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.1853546698888143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.20389332373936972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.20988800128300986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.18886399269104004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.20784000555674234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.21084266901016235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.20840533574422201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.19341333707173666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.21006399393081665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.19608000914255777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.11561066905657451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.11780800422032674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.11430399616559346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.18501333395640054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.11680000027020772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.10962667067845662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.1113866666952769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.10988266269365947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.10987200339635213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.1033066709836324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.11306666334470113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.10989866654078166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.10371200243631999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.11608533064524333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.11179733276367188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.10538666447003682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.11369599898656209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.1013706624507904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.11429333686828613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.10923199852307637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.11077866951624553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.11624000469843547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.11090667049090068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.06477333108584087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.06457066535949707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.06668266654014587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.06446399788061778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.06750933329264323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.05840533475081126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.06234666705131531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.06247466802597046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.06356266637643178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.05793599784374237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.06434133152167003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.06286400059858958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.06411733229955037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.06434133152167003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.06429333488146464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.0650079995393753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.04243200023969015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.041189332803090416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.11743467052777608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.0423573354880015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.042133331298828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.04204800228277842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.02757333219051361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.027962667246659596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.027727998793125153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.026821332673231762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.04009066770474116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.02479466547568639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.05797333518664042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.022330666581789654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.021589333812395733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.02203733225663503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.02165333429972331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.021946666141351063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.021583999196688335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.01989866668979327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.021530665457248688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.1858933369318644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.1848213275273641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.17098132769266763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.18121600151062012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.16662399967511496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.1873813271522522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.18463999032974243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.1855093240737915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.170522669951121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.18358399470647177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.18301333983739218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.1674826741218567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.19338667392730713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.1900213360786438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.19353065888086954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.1797599991162618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.18931732575098673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.10539733370145161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.10539733370145161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.10558399558067322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.10565867026646932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.021898667017618816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.10355732838312785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.1016373336315155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.10035733381907146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.0972160001595815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.09919466574986775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.09321066737174988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.09707199533780415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.09098666906356812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.10153067111968994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.09942400455474854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.10130666693051656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.09657599528630574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.09860799709955852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.09335466225941975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.10531199971834819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.10320533315340678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.1049173374970754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.10296000043551128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.060559997955958046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.058821335434913635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.05881066620349884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.05840000013510386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.05436266462008158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.056890666484832764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.06213866670926412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.05850133299827576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.058559998869895935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.1773279905319214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.05991466840108236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.056703999638557434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.05637866755326589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.03807466725508372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.03774400055408478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.03722133239110311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.05839466551939646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.03728000074625015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.03797333439191183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.03381866713364919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.03584533433119456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.03603733330965042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.026485333840052288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.02380799998839696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.026730666557947796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.02033599962790807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.020506666352351505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.020096000283956528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.020768000433842342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.02013333390156428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.01844266677896182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.01810666670401891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.018277333428462345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.10416000088055928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.10545600454012553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.10317867000897725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.09711999694506328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.10514666636784871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.10729599992434184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.10528533657391866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.1055413285891215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.09956266482671101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.10532800356547038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.09813867012659709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.10995733737945557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.09717333316802979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.11108799775441487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.10865599910418193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.1081119974454244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.10386133193969727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.06007466713587443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.0603413333495458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.061039999127388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.05898133416970571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.0637546678384145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.05845333139101664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.10467200477917989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.058517331878344216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.054474666714668274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.058261334896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.058789332707722984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.05832533538341522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.05793066819508871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.057029331723848976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03958400090535482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.03957866628964742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.03904533386230469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.06005333364009857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.038160001238187156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03800000001986822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.039434666434923805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.03931200007597605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.03714133302370707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.05415999889373779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.037050666908423104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.03993066648642222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.040005333721637726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.03817066550254822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.02640533447265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.02606400102376938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.026186667382717133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.038149334490299225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.016602666427691776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.01647466669480006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.015775999675194424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.07705066601435344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.07690666615962982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.07036266724268596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.07659199833869934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.07786666850248973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.07678933441638947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.07691200077533722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.07001600166161855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.0765226682027181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.07067733506361644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.07866133252779643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.07879999776681264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.07808533310890198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.07276266813278198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.07232533395290375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.048570667703946434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.04598399996757507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.04629333317279816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.04693866769472758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.0421973317861557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.04576533536116282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.04197866717974345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.04304533203442892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.046810666720072426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.04598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.04558399816354116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.04629333317279816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.030821333328882854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.030938667555650074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.030074665943781536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.03102933367093404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.03180799881617228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.03057066599527995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.0216799999276797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.022255999346574146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.022639999787012737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.021776000658671062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.017711999515692394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.06477866570154826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.06486399968465169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.0652213344971339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.06054399907588959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.0650133341550827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.06053866446018219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.06670400003592174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.06515733400980632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.06657066444555919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.05933333436648051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.06038400034109751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.06484266618887584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.06610133250554402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.06672533353169759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.06238399942715963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.06498666604359944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.0621066689491272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.040021332601706185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.03789866715669632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.03791466603676478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.03859733293453852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.035877334574858345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.03998400022586187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.036415999134381614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.03914133210976919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.026485333840052288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.025978667040665943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.02644266684850057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.027045334378878277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.020693333198626835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.03989866624275843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.015935999651749928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.016586666305859882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.016373333831628162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.05840000013510386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.05682133138179779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.052576000491778054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.05630399783452352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.05213333169619242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.057855998476346336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.056218668818473816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.058677335580190025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.05823466678460439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.05855466425418854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.05858133236567179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.035546667873859406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.02493866781393687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.03565333286921183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.024293333292007446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.024133334557215374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.02473066747188568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.03502399971087774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.015978666643301647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.016336000214020412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,0,0.051551997661590576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,0,0.05198933184146881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,128,1,fp8,fp8,0,0.047055999437967934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,0,0.05207466582457224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,128,1,fp8,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,0,0.050293331344922386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,64,0,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,0,0.05203733344872793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,128,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,64,0,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,0,0.03233066697915395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,64,0,1,fp8,fp8,0,0.04756266872088114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,128,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,64,0,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,0,0.032069332897663116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,0,0.03183466692765554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,128,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,64,0,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,128,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,64,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,64,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,0,0.02367999901374181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,0,0.02363733450571696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,0,0.020799999435742695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,64,128,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,128,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,128,1,fp8,fp8,0,0.016341333587964375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,128,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,128,1,fp8,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,64,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,64,0,1,fp8,fp8,0,0.017770666629076004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,float16,fp8,0,0.01782400036851565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,64,128,1,float16,float16,0,0.03329599897066752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,128,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,fp8,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,64,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.23954667647679648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,1.4844053586324055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,1.4845493634541829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.23706666628519693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,1.3599467277526855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.25064533948898315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.25498666365941364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.2427519957224528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.22527466217676798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,1.5010612805684407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,0.8236479759216309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.14009066422780356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,1.4965972900390625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,0.826469341913859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.1276586651802063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.14629866679509482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,0.8062240282694498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.12261866529782613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,0.8077013492584229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,0.7415680090586344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.12780267000198364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.13219199577967325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.13553067048390707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.13009066383043924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,0.8135200341542562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,0.7474239667256674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.08374399940172832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,0.7585279941558838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.08691733082135518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.0846453309059143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.4793813228607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,0.8099040190378824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.08074133098125458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,1.3716746966044109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.08107199768225352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.07487466434637706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.47279465198516846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.4416693449020386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.08080000181992848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.4774453242619832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.07869866490364075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.47539734840393066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.43114666144053143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.06426666676998138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.4726719856262207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.3185759981473287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.062080000837643944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.318234662214915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.2918986678123474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.4349813461303711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.06427200138568878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.06400533517201741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.06191466748714447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.31732799609502155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.28972800572713214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.06461333235104878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.3162720004717509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.31658132870992023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.06132266422112783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.31727466980616253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.29014400641123456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.18145066499710083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.18375466267267862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.17170133193333945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.4719093243281047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,0.8988640308380127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,0.821610689163208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.19274133443832397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,0.8963733514149984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.19217065970102945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.18236800034840903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.11206400394439697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,0.9078826904296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.5112640062967936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.11171199878056844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.5146666765213013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.47436801592508954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,0.8337120215098063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.10129599769910176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.10141866405804952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.09515733520189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.4992479880650838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.4552319844563802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.10335999727249146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.1067733367284139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.10156800349553426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.4983839988708496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.5023733377456665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.06608533362547557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,0.9067200024922689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.30501866340637207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.06834666430950165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.3061973253885905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.28351465861002606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.30351465940475464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.06340266764163971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.4635626475016276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.3081120053927104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.2773066759109497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.303439994653066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.5004800160725912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.3036479949951172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.27962666749954224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.054527997970581055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.2076853315035502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.20786132415135702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.06464000046253204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.05453333258628845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.06439466774463654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.20759999752044678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.2076853315035502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.19144533077875772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.20772800842920938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.19115734100341797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.1909439961115519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.15146666765213013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.1546453336874644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.20769067605336508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.146096001068751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.05431999762852987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,0.6566186745961508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.1586186687151591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.16078933080037436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,0.6635253429412842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,0.6564480066299438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.15397866566975912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.09527466694513957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.6131680011749268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.380298654238383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.0953546663125356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.3822400172551473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.3538026809692383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.08724266290664673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.09708799918492635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.6039040088653564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.0817440003156662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.37396268049875897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,0.6663039922714233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.09112000465393066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.372597336769104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.08761599659919739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.08665066957473755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.3762986660003662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.34088532129923504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.34622399012247723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.3737599849700928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.23642667134602866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.24231467644373575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.22000000874201456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.09141866366068523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.05825600028038025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.23591999212900797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.23498133818308511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.21666133403778076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.23682665824890137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.061466669042905174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.05985066791375478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.0587360014518102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.21806933482487997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.15676800409952799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.1565600037574768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.14396799604098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.2367039918899536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.05018133421738943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.15678933262825012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.15676800409952799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.05989866455396017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.04971200227737427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.1567146678765615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.046896000703175865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.15618133544921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.14320000012715658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.05027199784914652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.2320693333943685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.1441973348458608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.23546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,0.8401760260264078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.25011734167734784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,0.7700053056081136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.2476960023244222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.23325333992640176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.21996267636617026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,0.8549760182698568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,0.7815946737925211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,0.8485120137532552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.1464853286743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.13428800304730734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,0.8411893049875895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.13843199610710144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.43674135208129883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.12111999591191609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.4702346722284953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.12191466490427653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.11588266491889954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.4535893201828003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.4171359936396281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.12600533167521158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.4724800189336141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.45763734976450604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.12772799531618753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.4609440167744954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.4252479871114095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.4517279863357544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.269269327322642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.07899199922879536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.2732906738917033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.1281066636244456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.2529066602389018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.07276266813278198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.26358399788538617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.07788800199826558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.2654133240381877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.24247999986012778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.07460266848405202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.2651413281758626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.2669653296470642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.24572799603144327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.04770133395989736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.0748533308506012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.048991998036702476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.17706666390101114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.07856533428033192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.04786666731039683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.17287466923395792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.17305066188176474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.1585493286450704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.17289066314697266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.1750453313191732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.16272000471750894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.16058666507403055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.038176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.12363200386365254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.12168000141779582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.17292267084121704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.11343466242154439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.03961600114901861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.12167466680208842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.12172266840934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.11379733681678772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.12351466218630473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.11339733004570007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.18036266167958578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.5225439866383871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.17083199818929037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.4803839921951294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.1892319917678833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.1830880045890808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.5315680106480917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.18125865856806436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.5250720183054606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.5335466861724854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.12316266695658366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.10923199852307637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.30026666323343915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.10600533088048299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.190394659837087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.30295999844868976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.2805226643880208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.09762133161226909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.28758933146794635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.09923199812571208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.4922933181126912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.288592000802358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.26310932636260986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.09996267159779866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.2896266579627991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.10227200388908386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.09724266330401103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.29309332370758057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.2699573238690694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.062234664956728615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.17728533347447714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.09132267038027446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.17942933241526285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.11137066284815471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.1658399999141693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.1745599905649821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.05909333129723867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.1751413345336914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.05863999823729197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.1743733286857605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.0601440022389094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.05638933181762695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.17673067251841226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.16247999668121338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.043322667479515076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.1595200002193451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.04418666660785675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.11770133177439372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.1090719997882843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.11532800396283467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.11602133512496948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.1072213351726532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.04257066547870636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.11740799744923909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.11562666296958923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.1074186662832896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.09506133198738098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.09544533491134644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.09543466567993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.03258133431275686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.09502933422724406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.08864532907803853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.11661866307258606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.09515200058619182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.238864004611969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.5194773276646932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.23917865753173828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.22259199619293213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.5219253301620483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.47893333435058594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.25089067220687866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.09486933549245198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.5356053511301676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.23643199602762857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.5357760190963745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.49032533168792725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.14005333185195923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.2949333389600118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.13505599896113077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.29838399092356366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.2523840069770813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.1202133297920227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.2757440010706584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.11752000451087952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.27637867132822674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.14088533322016397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.2574133276939392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.28203733762105304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.1295146644115448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.12359467148780823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.28543466329574585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.2644960085550944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.1260426640510559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.16502933700879416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.07700799902280171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.07699200014273326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.12403200070063274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.1686453421910604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.15826132893562317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.06864533325036366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.15873066584269205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.07025066514809926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.06585066517194112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.1611733337243398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.14683733383814493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.07106666763623555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.2762666742006938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.07060266534487407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.1637493371963501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.1504586637020111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.1072160005569458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.10758933424949646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.09915199875831604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.10322133700052898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.041450666884581246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.10365333159764607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.09704533219337463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.10542399684588115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.10629866520563762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.09699199597040813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.07454933226108551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.16104533274968466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.07077333331108093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.0664160003264745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.07067733506361644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.07098133365313213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.0647626668214798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.02991466720898946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.07043200234572093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.07055999835332234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.06657599906126659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.06898666421572368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.06861866513888042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.06853866577148438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.06857066849867503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.06453866759936015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.07239999870459239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.18291199207305908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.33765331904093426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.17076265811920166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.3372533321380615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.31062932809193927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.18946133057276407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.19153600931167603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.34508268038431805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.18106667200724283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.34570133686065674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.10924266775449117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.19564799467722574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.18481600284576416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.10759466886520386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.19734932978947958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.1856000026067098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.09402666489283244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.18163732687632242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.09641066193580627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.181551992893219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.16760534048080444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.09778666496276855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.32233599821726483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.1006666620572408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.18746666113535562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.1747466723124186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.05997333427270254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.11135466893513997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.06102400024731954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.11452266573905945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.10583999752998352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.056176001826922096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.10830932855606079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.10959466298421223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.10126399993896484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.056405335664749146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.10988799730936687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.10177600383758545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.07346666852633159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.0749120016892751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.03998400022586187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.07259733478228252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.04002666721741358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.09804266691207886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.07235733171304067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.06691200037797292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.07497066756089528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.07454399764537811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.18345600366592407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.053871999184290566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.02777066578467687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.05622399846712748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.05621333420276642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.057002668579419456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.02622933437426885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.05580799778302511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.055904000997543335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.02584533393383026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.05514133473237356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.05569066603978475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.24420267343521118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.36295465628306073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.2457759976387024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.22524267435073853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.36177066961924237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.3319786588350932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.2550719976425171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.37349867820739746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.2568959991137187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.23722134033838907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.37620266278584796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.34308799107869464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.14034666617711386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.14081066846847534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.1363200048605601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.056746666630109154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.19609065850575766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.1209386686484019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.1877280076344808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.12272533774375916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.1193386713663737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.1876586675643921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.17882132530212402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.12814933061599731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.19662932554880777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.12914133071899414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.2065920035044352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.19713600476582846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.1839146614074707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.07673066854476929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.11334932843844096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.07897066573301952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.0776800016562144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.10975466171900432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.06935999790827434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.10602133472760518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.12543466687202454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.07083733379840851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.06583466629187266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.10963732997576396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.10110400120417277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.07226666808128357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.1090613305568695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.11451199650764465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.06880000233650208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.10943466424942017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.10320533315340678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.04399466514587402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.07065600156784058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.07474666833877563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.06869333485762279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.043525333205858864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.07053333520889282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.03996799886226654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.0707946668068568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.06400000055631001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.07060266534487407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.07223999996980031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.047839999198913574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.029834667841593426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.0481333335240682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.04538666705290476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.07478933533032735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.04353600243727366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.20751466353734335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.029909332593282063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.04377066592375437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.04144533226887385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.02380799998839696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.2002133329709371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.2604373296101888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.043525333205858864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.191103994846344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.26477867364883423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.24578134218851724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.04599999884764353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.21108800172805786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.2714186708132426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.20898133516311646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.26968000332514447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.2515786687533061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.11840533216794331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.1530933380126953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.11752532919247945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.11780800422032674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.15100799997647604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.14813333749771118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.09985599915186565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.13363200426101685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.1011893351872762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.09341866771380107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.19996267557144165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.12428800264994304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.10356799761454265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.13913599650065103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.10566400488217671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.10260799527168274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.14008532961209616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.1334933340549469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.06223999957243601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.08386133114496867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.08643200000127156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.14112533132235208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.08072000245253245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.06011199951171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.08188800017038982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.05930666625499725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.20083200931549072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.07483200232187907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.062394668658574425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.08189866443475087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.05653866628805796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.08298133313655853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.0543039987484614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.052298665046691895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.04062400013208389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.06343466540177663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.054325332244237266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.04018666595220566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.03786666691303253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.040847999354203544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.05392000079154968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.04231466849644979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.03833066672086716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.05474133292833964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.028624000648657482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.081194669008255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.03961600114901861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.026176000634829204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.037050666908423104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.035301332672437034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.04008533308903376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.03543466577927271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.023797333240509033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.03528533379236857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.03239466746648153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.03427733232577642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.03526933242877325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.20520534118016562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.2435306708017985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.20412800709406534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.19267733891805014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.242741326491038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.2266026735305786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.2091040015220642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.03711999952793121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.20401599009831747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.24658666054407755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.2366080085436503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.12113066514333089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.14072533448537192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.11931199828783672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.11927466591199239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.14028267065684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.13827199737230936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.10979732871055603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.20779200394948324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.10980266332626343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.13218133648236594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.12180800239245097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.11374933520952861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.1357973317305247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.11301866173744202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.11171733339627583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.13447999954223633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.13058666388193765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.2495786746342977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.07948266466458638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.06657066444555919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.06516266862551372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.07891733447710673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.07478933533032735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.06339733302593231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.07554133236408234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.05923733115196228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.07672533392906189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.07374933362007141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.06584000090758006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.07681599756081899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.06472533444563548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.059861332178115845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.13288000226020813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.05187733471393585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.040922666589419045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.052298665046691895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.041696002086003624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.06633066634337108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.07044266661008199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.050399998823801674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.040048000713189445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.05126399795214335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.02698666602373123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.033370666205883026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.02348266790310542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.02829866607983907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.0278613343834877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.01971199984351794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.026799999177455902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.20001065731048584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.20229866107304892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.05142933130264282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.20030399163564047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.1877653400103251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.2012959917386373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.19009600083033243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.2030186653137207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.034101332227389015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.2034133275349935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.19411200284957886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.11640533804893494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.20651199420293173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.11958400408426921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.1982240080833435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.11600533127784729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.11353600025177002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.11731732885042827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.11501333117485046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.10804800192515056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.10962667067845662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.10763200124104817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.09975999593734741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.11269332965215047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.10155733426411946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.1113759974638621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.2043573260307312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.10960533221562703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.10773866375287373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.10930666327476501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.11181333661079407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.06497600177923839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.06347733239332835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.0633493314186732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.062362665931383766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.06274666885534923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.06177066763242086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.11347732941309611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.058378666639328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.06268266836802165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.06257066627343495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.06250133117039998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.058431997895240784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.059215997656186424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.04190933207670847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.0401706670721372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.06632533172766368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.0278613343834877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.024495999018351238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.022218666970729828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.022682666778564453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.021781332790851593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.022826666633288067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.01966399947802226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.020586666961510975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.06241066753864288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.02077866718173027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.0995146632194519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.09911466638247173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.09286399682362874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.0979360044002533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.09053867061932881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.10333866874376933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.10132799545923869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.09963732957839966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.09973866740862529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.09919466574986775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.09779733419418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.05868266522884369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.06031466523806254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.05810666580994924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.057775999108950295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.059989333152770996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.05786666770776113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.05454933146635691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.05619200070699056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.05425066749254862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.05779733260472616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.019626667102177937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.0566293348868688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.056549335519472756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.09947199622790019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.03684266656637192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.038176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.03774400055408478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.035258665680885315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.025968000292778015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.025829332570234936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.025663999219735462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.0200853335360686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.018757333358128864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.01860800012946129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.05849599838256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.058287998040517174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.05446400245030721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.058373332023620605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.06028266747792562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.056426664193471275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.059157331784566246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.0180479995906353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.03789866715669632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.04020266731580099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.038191998998324074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.03965333352486292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.03618666778008143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.02595199892918269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.026181332767009735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.02587733417749405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.02629866699377696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.03833066672086716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.0262719988822937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.02645866572856903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.019765333582957584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.017797333498795826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.0161920003592968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.016458666572968166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.04614399870236715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.04410133262475332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.047781333327293396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.046367997924486794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.03169066707293192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.04426133135954539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.02180800090233485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.01647466669480006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.03856533269087473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.03995199998219808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.03828799972931544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.036133334040641785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.02699733277161916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.026975999275843304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.026127999027570088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.02624000112215678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.026565333207448322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.025850666066010792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.01629866659641266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.01637866720557213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.015856000284353893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.035045333206653595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.033946665624777474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.03384533276160558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.026709333062171936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.018320000420014065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.020175999651352566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.01637866720557213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,64,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,0,0.03236266722281774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,0,0.03313066562016805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,128,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,0,0.03320533285538355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,64,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,128,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,64,0,1,fp8,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,64,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,64,128,1,float16,float16,0,0.02447466552257538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,0,0.01664000004529953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,128,1,fp8,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,128,1,fp8,fp8,0,0.01565333331624667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,128,1,fp8,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.12752532958984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.1300373375415802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,0.8064266840616862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,0.807861328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.47233064969380695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.0827946662902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.12404800454775493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.4740693171819051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.4365066687266032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,0.7432906627655029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.07241599758466084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.07257600128650665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.46855465571085614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.4328586657842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.3030880093574524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.4692853291829427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.30238399902979535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.27960000435511273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.049770668148994446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.30052266518274945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.27561066548029584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.3020000060399373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.2100693384806315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.21000534296035767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.1954560081164042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.21006399393081665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.21053866545359293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.19320533672968546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.10048000017801921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.10269866387049358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.09722666939099629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.03956799954175949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.45981331666310626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.06196799874305725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.0643039991458257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.4969013532002767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.30405332644780475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.27827733755111694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.059530665477116905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.2979360024134318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.30211732784907025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.2755413254102071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.19735999902089438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.4993600050608317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.19827200969060263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.18117332458496094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.04409599800904592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.19543466965357462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.04598399996757507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.1975253423055013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.17907732725143433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.16056000192960104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.03530666728814443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.06148266792297363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.16058666507403055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.14843733112017313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.1607039968172709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.1607253352801005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.1482186714808146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.0876639982064565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.08932266632715861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.08488000432650249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.374725341796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.308463990688324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.058304001887639366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.2347093423207601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.2448213299115499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.3717120091120402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.216154674688975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.23313599824905396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.3431520064671834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.23301867643992105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.21394133567810059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.14738133549690247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.04233600199222565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.14856533209482828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.136272003253301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.04062400013208389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.14752533038457236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.03871466716130575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.15296533703804016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.04151466737190882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.0312266672650973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.1362879971663157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.13618666927019754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.12598933776219687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.1381066640218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.135861337184906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.13619732856750488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.12573333581288657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.12591466307640076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.45816532770792645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.12811199824015299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.12377066413561504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.46270398298899335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.07690666615962982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.2681279977162679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.07824000219504039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.07685866455237071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.27195199330647785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.25062400102615356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.07226133346557617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.4249866803487142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.2654293378194173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.07439466814200084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.06836799780527751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.26739199956258136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.24465600649515787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.17283199230829874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.1750826636950175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.16059199968973795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.045893331368764244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.17166399955749512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.04613866905371348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.1564906636873881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.03176533430814743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.114656001329422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.04171733558177948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.17125332355499268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.11537599563598633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.11531733473141988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.10678399602572124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.029898665845394135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.1126026709874471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.0296426663796107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.11190932989120483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.10348266363143921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.11153599619865417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.11338133613268535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.10528000195821126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.10085333387056987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.1163200040658315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.2898293336232503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.1074079970518748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.09778133034706116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.29358933369318646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.26956266164779663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.05958933134873708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.1759786605834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.06108266611893972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.17733865976333618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.056613331039746605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.1734453241030375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.060047999024391174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.10335466265678406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.1749653418858846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.15896532932917276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.11552000045776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.04062400013208389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.11640000343322754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.16608533263206482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.03993066648642222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.11437333623568217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.11551466584205627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.02775466690460841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.09099733829498291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.1074186662832896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.08349866668383281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.08897067109743755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.09089600046475728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.08274133503437042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.088837335507075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.08899733424186707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.08109866579373677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.0870293378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.08900266885757446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.08285333216190338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.13110933701197305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.2869173288345337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.13247999548912048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.12451733152071635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.2879520058631897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.07715199887752533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.265994668006897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.16607466340065002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.1688106656074524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.15956800182660422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.07257066667079926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.162800004084905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.07062399884064992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.16477866967519125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.15019733707110086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.07852266728878021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.10730666915575664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.09942932923634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.04385599990685781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.10532266894976298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.04584533472855886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.09696533282597859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.07175466914971669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.07014399766921997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.06579199930032094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.07030400137106578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.06981333096822102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.06665599842866261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.07418666779994965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.025839999318122864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.0650186687707901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.06540266672770183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.061994666854540505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.0643039991458257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.025802666942278545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.06046399970849355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.06440000236034393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.10633599758148193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.19215466578801474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.10756267110506694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.10466667016347249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.18256000677744547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.06022400160630544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.11607467134793599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.10917333761850993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.06065600117047628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.11341333389282227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.19238932927449545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.06192000210285187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.11553066968917847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.11362133423487346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.10483733812967937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.04133866727352142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.07482133309046428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.042821332812309265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.04011200120051702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.07668800155321757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.07046933472156525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.040394666294256844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.07537066439787547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.07448533177375793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.06885866820812225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.029253333806991577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.05795733133951823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.028463999430338543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.057914664347966514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.05610666672388712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.052383999029795326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.052042668064435325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.05387733379999796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.05063466727733612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.052042668064435325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.024058667321999867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.05217599868774414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.052426666021347046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.12998400131861368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.19829332828521729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.1317759950955709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.05179733534653982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.19954133033752441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.18347734212875366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.07469866673151652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.1137600044409434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.08002666632334392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.07549333572387695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.12091733018557231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.025008000433444977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.107205331325531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.06846400101979573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.11103999614715576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.103301336367925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.04438399771849314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.07266133526961009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.07432533303896587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.06759466727574666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.04390400151411692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.07022400200366974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.0706879993279775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.041850666205088295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.07052266597747803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.029717333614826202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.04530133306980133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.041589332123597465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.03999999910593033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.02204799900452296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.03957866628964742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.039749334255854286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.12490133444468181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.039450667798519135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.04001600046952566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.1088746686776479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.13827199737230936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.1034346620241801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.1395786702632904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.08570667107899983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.06446399788061778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.05937066674232483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.13396267096201578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.06198399762312571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.08299200236797333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.07714666426181793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.0432640016078949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.056143999099731445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.043925335009892784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.08196799953778584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.04164800047874451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.055455997586250305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.05375466744105021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.05529066423575083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.05550399919350942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.08038933575153351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.062234664956728615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.040405333042144775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.03726933399836222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.033786666889985405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.031888000667095184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.02180800090233485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.02162666618824005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.02186666677395503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.030576000610987347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.11122133334477742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.1329866647720337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.10893332958221436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.10500799616177876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.12331733107566833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.06574933230876923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.07911466558774312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.06534933547178905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.07733333110809326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.07487999896208446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.06444266438484192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.07632533212502797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.06365333497524261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.06053866446018219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.07670400043328603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.04048000027736028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.05022400120894114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.048709332942962646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.049098665515581764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.037920000652472176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.0400693342089653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.02678400029738744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.03410666684309641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.03377600014209747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.02248000105222066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.13216533263524374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.023584000766277313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.029093332588672638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.027845333019892376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.02779199928045273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.02699200063943863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.04044266790151596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.1079253355662028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.11005333065986633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.10275200009346008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.06270933151245117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.06344533463319142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.062421331803003945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.06305066744486491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.061887999375661217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.06028800209363302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.06156266729036967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.10152533650398254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.06055466830730438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.05602133274078369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.062405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.041034666200478874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.03933866570393244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.039173332353432976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.0402399996916453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.10760000348091125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.04005866746107737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.04201066493988037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.027664000789324444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.028463999430338543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.028207999964555103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.02385599911212921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.023738667368888855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.01993600030740102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.056847999493281044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.05750933289527893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.056261335810025535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.05295466880003611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.03555200000603994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.03595733394225439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.05643733342488607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.03809600075085958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.025973332424958546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.02387733260790507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.02181866765022278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.0198186660806338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.018719999740521114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.018613333503405254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.037589333951473236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.03812266637881597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.03674133370320002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.02643200010061264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.026928000152111053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.026133333643277485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.01637866720557213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.016063999384641647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.021669333179791767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.021530665457248688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.02045866722861926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.016501333564519882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.02035733312368393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.020080000162124634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.020037333170572918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.016224000602960587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,0,0.018272000054518383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,64,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,64,128,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.04677333434422811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.30213866631189984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.27590399980545044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.3017546733220418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.20427733659744263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.032229334115982056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.1872373421986898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.20137600104014078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.20132799943288168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.18381865819295248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.19617599248886108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.20468799273173013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.1963626742362976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.18106667200724283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.15477333466211954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.1568106710910797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.14421866337458292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.15361066659291586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.1532853345076243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.14215999841690063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.0400693342089653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.04399999976158142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.14733333388964334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.04011200120051702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.028010666370391846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.1361066699028015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.13286399841308594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.1341333289941152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.12223466237386067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.1304693321386973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.14628266294797262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.11964799960454305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.17238932847976685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.17427732547124228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.16051733493804932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.13222933808962503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.11442666252454121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.1065013309319814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.1106666624546051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.10972266395886739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.10876799623171489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.10753066341082256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.09920533498128255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.11564266681671143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.0439626673857371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.04111466556787491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.1181173324584961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.10864532987276714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.09115733702977498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.08515733480453491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.11402133107185364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.08682666222254436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.09085866808891296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.07954666515191396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.0849226713180542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.08504000306129456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.07845866680145264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.1053546667098999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.10797866185506184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.09922666351000468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.0705973356962204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.0708000014225642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.06440000236034393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.06560533245404561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.0651146670182546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.06039999922116598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.02378133436044057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.06398400167624156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.024080000817775726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.06276800235112508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.058746665716171265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.021642667551835377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.058042665322621666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.042223999897638954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.0743146687746048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.07672533392906189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.06859200199445088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.056346664826075234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.028517333169778187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.05832533538341522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.05204799771308899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.054048001766204834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.04982399940490723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.05175999800364176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.05049066742261251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.07261333366235097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.04834666848182678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.044346665342648826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.07521066566308339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.04790933430194855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.046309332052866616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.04171733558177948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.022543999056021374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.04353600243727366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.055919999877611794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.03987200061480204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.041077333192030586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.03533333291610082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.024453334510326385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.032069332897663116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.019999999552965164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.032144000132878624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.03952533255020777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.04987200101216634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.0220320001244545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.05064533154169718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.046096002062161766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.02812266598145167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.027765333652496338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.0327360009153684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.022143999735514324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.029968000948429108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.020501332978407543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.020096000283956528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.04014399896065394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.02089600016673406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.02789866675933202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.02420266717672348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.022096000611782074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.02180800090233485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.021669333179791767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.02584533393383026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.02062400057911873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.020373333245515823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.018453333526849747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.017978666971127193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.018837332725524902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.016117333124081295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.016682667036851246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.015743999431530636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,float16,float16,0,0.016575999557971954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,33.56965891520182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,float16,0,46.01624552408854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,fp8,0,45.64826965332031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,float16,0,45.757853190104164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,fp8,0,43.550923665364586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,34.228474934895836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,float16,0,45.18840026855469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,fp8,0,47.31346638997396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,23.563270568847656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,33.93291727701823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,23.53893788655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,21.510724385579426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,17.018421173095703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,22.516886393229168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,22.730939229329426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,16.884122212727863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,22.87652333577474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,17.06782404581706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,22.094383239746094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,22.240638732910156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,12.385055541992188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,8.995349248250326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,11.623114267985025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,8.688144048055014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,11.244661966959635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,11.00388209025065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,8.800506591796875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,11.080266316731771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,11.174400329589844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,8.822133382161459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,11.49826685587565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,11.549524943033854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,5.837557474772136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,6.218757629394531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,4.780074755350749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,5.557696024576823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,5.774277369181315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,4.645423889160156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,5.603839874267578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,5.5498402913411455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,4.645402590433757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,5.620351791381836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,5.621573130289714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,4.652592023213704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,19.752335866292317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,26.349461873372395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,26.759012858072918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,20.137696584065754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,26.040054321289062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,25.862144470214844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,20.100884755452473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,26.31822967529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,25.44988759358724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,10.55018679300944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,14.160474141438803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,13.936522165934244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,12.754533131917318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,10.205685297648111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,13.138277689615885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,10.089738845825195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,13.237733205159506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,13.137690226236979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,10.23036257425944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,13.422991434733072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,12.758421579996744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,6.8322188059488935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,5.478848139444987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,7.076122919718425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,6.774330774943034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,5.263541221618652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,6.334618886311849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,6.8208802541097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,5.251296043395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,6.481407801310222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,6.67633056640625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,6.301968256632487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,3.5962613423665366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,5.253424008687337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,2.9644320805867515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,3.217712084452311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,3.3230454126993814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,2.884026527404785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,3.4487574895222983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,3.3729918797810874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,2.851578712463379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,3.261882781982422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,3.2051146825154624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,2.8575731913248696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,3.3087892532348633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,17.320091247558594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,14.298863728841146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,19.1244379679362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,14.49829355875651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,18.258443196614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,19.698362986246746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,14.412378946940104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,18.13587697347005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,19.689573923746746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,10.279216130574545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,9.759477615356445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,7.708650588989258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,9.608698527018229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,8.985621134440104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,7.304773330688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,7.327002843221028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,9.457520167032877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,9.09065055847168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,7.332906723022461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,9.485845565795898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,9.670197168986002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,4.764437357584636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,4.045413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,4.955658594767253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,4.77128537495931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,4.596154530843099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,3.8414131800333657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,4.525973320007324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,3.8472534815470376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,4.70904000600179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,4.668661435445149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,3.8566080729166665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,4.687674522399902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,2.5263306299845376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,2.5293919245402017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,2.4303305943806968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,2.390117327372233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,2.1113333702087402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,2.4123892784118652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,2.210927963256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,2.3526879946390786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,2.115386644999186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,2.3685545921325684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,2.353871981302897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,2.1146507263183594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,19.233685811360676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,float16,0,26.344629923502605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,fp8,0,25.877349853515625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,float16,0,26.041893005371094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,19.371477762858074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,fp8,0,24.847318013509113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,float16,0,25.8863042195638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,fp8,0,25.752522786458332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,13.177754720052084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,19.63831965128581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,13.052480061848959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,11.99013900756836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,9.690207799275717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,12.277887980143229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,12.320826212565104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,9.878837585449219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,12.411125183105469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,12.289403279622396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,9.900154749552408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,12.402576446533203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,6.3377119700113935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,5.362848281860352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,6.617029190063477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,6.304938634236653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,4.98527463277181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,6.4253495534261065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,6.183050791422526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,5.006869316101074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,6.186703999837239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,6.263141632080078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,5.008154551188151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,6.263258616129558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,3.2774880727132163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,3.3138561248779297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,2.799258550008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,3.0410827000935874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,3.1048107147216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,2.8874292373657227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,3.0358667373657227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,2.647610664367676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,2.646378676096598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,3.0214719772338867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,2.9544105529785156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,2.6572052637736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,1.6920852661132812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,1.710975964864095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,1.6009227434794109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,1.540357271830241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,1.6149120330810547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,1.4890185991923015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,1.607386589050293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,1.4858400026957195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,1.6094560623168945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,1.6193973223368328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,1.611786683400472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,1.4842987060546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,11.7566769917806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,14.189620971679688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,14.293898264567057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,11.965733846028646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,float16,0,15.033770243326822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,fp8,0,15.049748738606771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,11.828608194986979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,float16,0,14.935413360595703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,fp8,0,15.09104029337565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,7.679637273152669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,6.436117172241211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,8.195178349812826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,5.957743962605794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,7.301978429158528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,7.476463953653972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,7.240826924641927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,7.300021489461263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,5.974576314290364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,7.299914677937825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,7.664000193277995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,3.975184122721354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,5.98961067199707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,3.982874552408854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,3.3228321075439453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,3.567829449971517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,3.72437318166097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,3.0911200841267905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,3.519008000691732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,3.777525266011556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,3.098736127217611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,3.5934880574544272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,3.8296054204305015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,3.1056321461995444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,1.9356427192687988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,2.040506680806478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,1.7758827209472656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,1.836176077524821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,1.8310933113098145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,1.6619200706481934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,1.8427093823750813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,1.8665547370910645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,1.6656479835510254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,1.8318880399068196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,1.863082726796468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,1.0770080089569092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,1.0811573664347331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,1.0339093208312988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,1.0289173126220703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,1.0233813126881917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,1.6729440689086914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,0.9508533477783203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,1.0271039803822835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,1.027173360188802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,0.9534666538238525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,1.0270079771677654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,1.035200039545695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,0.9553813139597574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,12.145195007324219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,float16,0,13.973258972167969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,fp8,0,14.356639862060547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,12.201498667399088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,float16,0,14.85256576538086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,fp8,0,15.263936360677084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,12.223562876383463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,float16,0,14.826218922932943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,fp8,0,14.869466145833334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,7.620325088500977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,8.230037053426107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,6.849834442138672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,7.187493642171224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,6.045536041259766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,7.093530654907227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,7.070181528727214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,6.067424138387044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,7.130133310953776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,7.142821629842122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,6.101408004760742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,4.0722611745198565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,3.8613494237264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,3.4157225290934243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,3.5219039916992188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,3.5657758712768555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,3.1066932678222656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,3.6638027826944985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,3.521365483601888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,3.1045281092325845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,3.547349294026693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,3.6503467559814453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,3.1182613372802734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,1.9410559336344402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,1.950592041015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,1.7845652898152669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,1.795413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,1.7803786595662434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,1.6286986668904622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,1.7900373140970867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,1.8039199511210124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,1.6316372553507488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,1.8332160313924153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,1.8015999794006348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,1.6426026026407878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,1.049829324086507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,0.9702293078104655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,1.0434400240580242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,0.973861296971639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,0.97325332959493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,0.8966026306152344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,0.9706506729125977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,0.9770613511403402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,0.8966773351033529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,0.9796586831410726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,0.9749333063761393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,0.9035840034484863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,0.5942506790161133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,0.6063093344370524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.5643253326416016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,0.5655893484751383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,0.5699520111083984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.5290400187174479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,0.5708586772282919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.5318559805552164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,0.5720106760660807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,0.5724426507949829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,0.5726026693979899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.5333439906438192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,7.685312271118164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,float16,0,9.022298812866211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,fp8,0,9.230293273925781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,float16,0,8.80728530883789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,fp8,0,9.00485865275065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,7.708698908487956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,float16,0,8.78994115193685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,fp8,0,9.412453333536783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,4.862282752990723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,7.748741149902344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,4.989472071329753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,4.325152079264323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,4.406442642211914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,3.8634827931722007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,4.618869463602702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,4.314511934916179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,4.3628692626953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,3.8784958521525064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,4.5318559010823565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,4.356906572977702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,3.901968002319336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,2.4701760609944663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,2.4525866508483887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,2.203813393910726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,2.1935253143310547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,2.2303519248962402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,2.2035039265950522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,1.9980106353759766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,2.0022667249043784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,2.2218400637308755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,2.200383981068929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,2.2182933489481607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,2.012821356455485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,1.2510826587677002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,1.2820266882578533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.1749333540598552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,1.156874656677246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,1.1577386856079102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,1.0606506665547688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,1.1558612982432048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,1.162010669708252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,1.064197301864624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,1.1714773178100586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,1.1678133010864258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,1.0705173015594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,0.701136032740275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.652453343073527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,0.6386346817016602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,0.6557120084762573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.5958133141199747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,0.6515946785608927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,0.6470079819361368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,0.6890292962392172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.5980159838994344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,0.6473439931869507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,0.6536426544189453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.5994133154551188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.4060800075531006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.4147253433863322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.38730132579803467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.3840159972508748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.38543999195098877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.38550400733947754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.38655467828114826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.36184000968933105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.3887999852498372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.39028267065684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.3622666597366333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.3628106514612834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,float16,0,9.752021153767904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,fp8,0,9.49838383992513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,8.433392206827799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,float16,0,9.918298721313477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,8.48084831237793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,fp8,0,9.62491226196289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,float16,0,9.748720169067383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,fp8,0,9.66485341389974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,8.54417610168457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,5.725472132364909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,5.537424087524414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,4.760474522908528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,4.724874814351399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,4.194229443868001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,4.765989303588867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,4.698479970296224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,4.2139892578125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,4.894799868265788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,4.954496065775554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,4.23909854888916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,2.668645222981771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,2.7297226587931314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,2.444602648417155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,2.3577067057291665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,2.3551573753356934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,2.1387573877970376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,2.3671414057413735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,2.3738187154134116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,2.146735986073812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,2.378453254699707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,2.377957344055176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,2.163381258646647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,1.355061372121175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,1.3841813405354817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,1.2712213198343914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,1.2261280218760173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,1.2272853056589763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,1.1221333344777424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,1.2248213291168213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,1.2364746729532878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,1.1221973101298015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,1.2373812993367512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,1.240885337193807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,0.7201973597208658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,0.7339466412862142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,0.6779200236002604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.1278560161590576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,0.6581013202667236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,0.6669066747029623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.6031466722488403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,0.664250651995341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,0.6629226605097452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.6054506699244181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,0.6632106701532999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,0.673098643620809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.6098293463389078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.40715734163920086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.41314133008321124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.38733867804209393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.3797813256581624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.3471306562423706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.3794666528701782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.38019732634226483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.3476320107777913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.3727733294169108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.3818560043970744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.38192001978556317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.35118401050567627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.25469332933425903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.23906666040420532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.23173334201176962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.23218133052190146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.21732266743977866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.23310933510462442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.247706671555837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.23204267024993896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.21842666467030844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.23326400915781656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.23617066939671835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.2201919953028361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,float16,0,6.258815765380859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,6.318074544270833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,5.627850850423177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,float16,0,6.385311762491862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,fp8,0,6.438373565673828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,5.653002421061198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,6.38435173034668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,fp8,0,6.347344080607097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,5.699130376180013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,3.5669066111246743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,3.5535093943277993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,3.2651360829671225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,3.089930534362793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,3.0896854400634766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,2.813935915629069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,3.1068480809529624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,3.1850452423095703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,2.827061335245768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,3.125178654988607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,3.1521546045939126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,2.8551413218180337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,1.801477273305257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,1.8170560201009114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,1.672976016998291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,1.581914742787679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,1.5919946034749348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,1.443178653717041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,1.5908746719360352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,1.5907786687215169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,1.4488159815470378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,1.5977813402811687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,1.6110026041666667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,1.4627200762430828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,0.931600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,0.9477706750233968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,0.8752053578694662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,0.8287893136342367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,0.8374773661295573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,0.760095993677775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,0.8372159798940023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,0.8375199635823568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,0.762389341990153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,0.8368480205535889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,0.8477066357930502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,0.7685706615447998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.50163733959198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.5098453362782797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.47227199872334796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.4515093167622884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.45530664920806885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.41672531763712567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.45667731761932373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.4563680092493693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.41648534933725995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.45606935024261475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.4598879814147949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.4203893343607585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.2869759996732076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.2923733393351237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.27262399593989056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.2568746606508891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.2608319918314616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.24411199490229288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.2655679980913798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.2618559996287028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.24515734116236368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.2624906698862712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.2691359917322795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.24539732933044434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.1805760065714518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.1817973256111145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.1732800006866455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.16154666741689047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.1520639955997467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.16110933820406595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.16313599546750387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.15237333377202353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.16365866859753928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.16328000028928122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.15448533495267233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.16363733013470969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,float16,0,6.89306640625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,6.8215891520182295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,6.246917088826497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,fp8,0,6.895631790161133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,float16,0,6.801551818847656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,7.086175918579102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,float16,0,6.969162623087565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,6.855786641438802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,fp8,0,6.9826507568359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,4.0187733968098955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,3.9307359059651694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,3.2694079081217446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,3.1284427642822266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,3.28331724802653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,3.359583854675293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,3.3961973190307617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,3.5069119135538735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,3.40068785349528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,3.472757339477539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,3.394773483276367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,1.9597867329915364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,1.948021411895752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,1.9569867451985676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,1.6499147415161133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,1.6495359738667805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,1.5775574048360188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,1.6621492703755696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,1.6644585927327473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,1.6260159810384114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,1.6818933486938477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,1.692255973815918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,1.623253345489502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,0.9877386887868246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,0.9706026713053385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,0.9831733703613281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,0.8422880172729492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,0.840399980545044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,0.8461759885152181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,0.8470773696899414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,0.8084212938944498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,0.8046240011850992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,0.860309362411499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,0.86189866065979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,0.823749303817749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.4997440179189046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.5049813191095988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.436624010403951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.4372640053431193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.5167626539866129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.4119040171305339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.44272534052530926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.44006399313608807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.416373332341512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.44539201259613037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.4476693471272786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.42510398228963214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.27878399689992267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.26664533217748004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.26934399207433063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.23334934314092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.23651200532913208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.23627734184265137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.23881600300470987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.22316799561182657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.23980265855789185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.24197334051132202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.22644799947738647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.15363732973734537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.151119997104009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.14818666378657022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.12818666299184164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.12963733077049255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.12014399965604146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.21737066904703775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.1292586624622345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.12954666217168173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.1213759978612264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.13089066743850708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.13149333000183105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.12460800011952718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.09245333075523376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.08746666709582011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.09142933289210002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.07884266475836436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.07026133437951405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.07745600243409474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.07853333155314128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.07082133491834004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.07851199805736542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.07769066592057546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.07178133229414622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,float16,0,5.570261637369792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,fp8,0,5.601392110188802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,5.319130579630534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,float16,0,5.867008209228516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,fp8,0,5.8744691212972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,6.180746714274089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,float16,0,5.966789245605469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,fp8,0,5.995082855224609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,3.5115254720052085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,5.97761599222819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,3.3814398447672525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,2.788501421610514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,2.784181276957194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,2.6713012059529624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,2.8539253870646157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,2.856639862060547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,3.063114802042643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,2.9454774856567383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,2.971680005391439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,2.909205436706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,1.7057706514994304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,1.7015520731608074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,1.7193279266357422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,1.4061493873596191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,1.4055147171020508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,1.348954677581787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,1.4196693102518718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,1.4264747301737468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,1.4293227195739746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,1.4384959538777669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,1.441973368326823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,1.4149279594421387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,0.8557120164235433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,0.8385279973347982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,0.865285317103068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,0.7187519868214926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,0.7153600056966146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,0.6848533153533936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,0.7234240372975668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,0.7235626379648844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,0.6939840316772461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,0.7318507035573324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,0.7322826385498047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,0.7076799869537354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.4426826635996501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.431717316309611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.445093313852946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.37191998958587646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.3723466793696086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.3503040075302124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.37298667430877686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.3556319872538249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.3795679807662964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.37918933232625324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.36510932445526123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.2369706630706787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.3721706469853719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.23435733715693155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.1975733240445455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.19962666432062784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.23042666912078857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.18691732486089072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.19953600565592447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.20012267430623373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.1897760033607483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.20175999402999878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.20310932397842407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.19356799125671387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.13155200084050497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.12813333670298258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.12963199615478516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.10748799641927083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.10723732908566792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.10106666882832845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.10806933045387268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.1086186667283376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.10248532891273499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.10877866546312968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.11006933450698853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.10427733262379964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.07776533563931783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.07448533177375793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.06613866488138835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.06622399886449178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.06426133215427399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.06477333108584087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.06477866570154826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.06635733445485432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.060592000683148704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.07989866534868877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.04445866743723551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.04525866607824961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.04402133325735728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.03999999910593033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.04409599800904592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.04548799991607666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,2.5231359799702964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,2.527162710825602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,2.419621308644613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,2.599280039469401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,2.6358133951822915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,2.8375574747721353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,2.6620853741963706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,2.713552157084147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,2.664602597554525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,1.5829493204752605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,1.6404266357421875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,1.5267252922058105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.2722240289052327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.275274674097697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.2187786897023518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.2824587027231853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.375050703684489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.288533369700114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,1.3034826914469402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.294986645380656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,0.7828160127003988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,0.7708266576131185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,1.295514663060506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.649344007174174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.6494773228963217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.6221173206965128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,0.8058613141377767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.6515466769536337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.6545386711756388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.6278506517410278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.6617226600646973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.6640479962031046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.6499520142873129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.40621864795684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.3957600196202596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.4130773146947225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.3386026620864868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.33874134222666424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.3189919988314311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.3386186758677165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.3403093417485555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.32390934228897095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.344101349512736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.3447306553522746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.3311306635538737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.2188106576601664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.21220799287160239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.21996267636617026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.18153599898020426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.18145600954691568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.16992533206939697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.18226667245229086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.181877334912618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.17319466670354208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.1851253310839335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.18587199846903482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.17761067549387613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.11973333358764648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.12214932839075725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.10102933645248413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.10021866361300151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.09335466225941975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.09969600041707356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.10170132915178935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.12246933579444885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.10194666186968486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.1019040048122406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.09876799583435059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.07010133564472198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.06742933392524719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.07272533575693767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.09611200292905171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.05470933516820272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.05835733314355215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.05468800167242686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.044154668847719826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.038736000657081604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.040037333965301514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.040250666439533234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.03798400113979975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.02899733434120814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.02497066557407379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.026181332767009735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.3970506985982258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.3954025904337566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.3407519658406575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.412709395090739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.4109172821044922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.5138400395711262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.4280799229939778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.4295679728190105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,0.8468853632609049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.4094613393147786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,0.8286506334940592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,0.8594933350880941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.706063985824585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.705893357594808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.6757760047912598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.7132159868876139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.7119306723276774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.6842453479766846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.7246560255686442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.725488026936849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.6954452991485596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.43235735098520917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.42318399747212726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.4395306507746379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.3642880121866862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.36321600278218585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.345850666364034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.3670453230539958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.36578667163848877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.3485066493352254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.3713333209355672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.3729066848754883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.358298659324646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.22830933332443237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.2227893273035685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.2306293249130249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.19184533754984537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.19138665994008383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.18245333433151245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.19350399573644003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.19407999515533447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.18434133132298788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.19473065932591757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.1881706714630127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.1241919994354248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.12173333764076233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.12593066692352295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.10382399956385295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.10359999537467957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.0974826713403066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.10565867026646932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.10544533530871074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.09981333216031392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.10690133770306905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.10322667161623637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.07113066812356313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.0705813318490982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.07644799848397572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.06198933223883311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.05730666716893514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.06216000020503998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.06180266539255778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05671999851862589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.0401706670721372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.037503999968369804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03602133442958196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.028069332242012024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.19685333967208862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,0.8851253191630045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,0.8872000376383463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,0.8764266967773438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,0.8926400343577067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,0.8962026437123617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,0.9075573285420736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,0.900719960530599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,0.9022506872812907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.5223840077718099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,0.8994186719258627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.5146666765213013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.5358613332112631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.4517226616541545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.4521973530451457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.44420798619588214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.45317331949869794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.45823466777801514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.4493013223012288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.4593973159790039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.4601493279139201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.45401068528493244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.27397332588831586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.26603732506434125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.2797440091768901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.23501867055892944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.23626132806142172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.23092800378799438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.23674132426579794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.23865600426991782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.23389865954717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.23889599243799844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.2404586672782898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.23741867144902548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.14503467082977295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.14330666263898215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.1509226659933726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.12742400169372559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.1278986632823944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.1276800036430359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.12683733304341635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.12386133273442586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.1279306709766388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.1280586620171865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.12826133767763773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.08272000153859456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.1220853328704834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.08082133531570435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.08875733613967896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07251733541488647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.07221866647402446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07239466905593872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.07025066514809926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07206933200359344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.07336000104745229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.04782933493455251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.04529066880544027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.04197866717974345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.04580800235271454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.030218665798505146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.029839999973773956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.02197866638501485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.021664001047611237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.6391520102818807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.6408586502075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.6635253429412842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.6406240065892538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.6429973443349203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.6484906673431396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.6453280051549276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.6435733238855997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.36398935317993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.6530666748682658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.3574346701304118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.3781013488769531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.32968000570933026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.32957865794499713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.33060266574223834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.33061333497365314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.3304319977760315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.33274134000142414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.3309813340504964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.33221866687138873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.3362773259480794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.19021334250768027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.1876266598701477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.19762667020161948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.17293866475423178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.17383466164271036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.17223467429478964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.1753173271814982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.1747679909070333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.17279465993245444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.17417599757512411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.17483733097712198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.17552000284194946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.1034399966398875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.11183466513951619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.09488000472386678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.09458667039871216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.09518399834632874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.09307199716567993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.09504533807436626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.10475732882817586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.09494400024414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.09344533085823059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.05879466732343038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.05881600081920624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.05657599866390228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.05690133571624756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.05645333230495453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.05670933425426483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.05793599784374237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.05504000186920166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.055914665261904396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.056986664732297264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.055733333031336464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.0369759996732076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.03548266738653183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.03610666592915853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.03588266670703888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.03608000030120214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.5330773194630941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5315306584040324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.539242664972941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.5337706804275513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.5352586507797241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.5400853157043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.5357493162155151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.5377013285954794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.2926933368047078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.5446879863739014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.2902666727701823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.3020906647046407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.27564799785614014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.2760213414827983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.2757866581281026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.2754826744397481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.2787359952926636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.27692266305287677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.27457600831985474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.2770880063374837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.28066666920979816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.15570666392644247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.15413866440455118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.16251200437545776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.14630400141080221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.14506133397420248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.1442453364531199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.1451359987258911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.14642133315404257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.14430399735768637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.14656000336011252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.14619200428326926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.1460586686929067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.08343467116355896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.0843946635723114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.08463999629020691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.08274133503437042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.0809333324432373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.08088533580303192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.08204799890518188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.08058133224646251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.08252266546090443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.048112000028292336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.04808000226815542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.04799466828505198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.04816000163555145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.04821333289146423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.0312266672650973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.03242133309443792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.02271999915440877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.02048533285657565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.019909333437681198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.0161920003592968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,0,0.45360533396402997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.08167466521263123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,0,0.45205334822336835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,1,128,0,1,fp8,fp8,0,0.4424906571706136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,0,0.4529973268508911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,0,0.4537280003229777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,4,128,0,1,fp8,fp8,0,0.4414986769358317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,0,0.4538079897562663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,float16,float16,0,0.23270932833353677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,0,0.4527146816253662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,float16,fp8,0,0.23478933175404867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,96,128,0,1,fp8,fp8,0,0.23013333479563394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,0,0.23439999421437582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,0,0.2343519926071167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,1,128,0,1,fp8,fp8,0,0.22822399934132895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,0,0.23237866163253784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,96,8,128,0,1,fp8,fp8,0,0.4601653416951497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,0,0.23453332980473837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,4,128,0,1,fp8,fp8,0,0.22839999198913574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,0,0.23412267367045084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,0,0.23388266563415527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,96,8,128,0,1,fp8,fp8,0,0.22897066672643027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,float16,fp8,0,0.12411733468373616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,fp8,fp8,0,0.12187199791272481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,0,0.12422933181126912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,1,128,0,1,fp8,fp8,0,0.12174399693806966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,0,0.12449066837628682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,0,0.12383466958999634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,4,128,0,1,fp8,fp8,0,0.12176533540089925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,0,0.12390399972597758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,fp8,fp8,0,0.12172800302505493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,float16,float16,0,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,float16,fp8,0,0.07064533233642578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,96,128,0,1,fp8,fp8,0,0.06889066596825917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,0,0.07017066578070323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,0,0.07061333457628886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,1,128,0,1,fp8,fp8,0,0.06865066786607106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,0,0.12390933434168498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,0,0.07020799815654755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,0,0.07054399947325389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,96,96,128,0,1,float16,float16,0,0.123690664768219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,0,0.07178666690985362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,fp8,fp8,0,0.06850666801134746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,float16,fp8,0,0.04225599765777588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,96,128,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,0,0.04394133388996124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,0,0.04404800136884054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,0,0.0711359977722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,fp8,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,0,0.04205333193143209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,8,128,0,1,fp8,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,96,4,128,0,1,fp8,fp8,0,0.0684746652841568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,1,128,0,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,0,0.02978666623433431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,fp8,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,0,0.04433066646258036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,96,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,96,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,fp8,fp8,0,0.022511998812357586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,4,128,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,8,128,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,float16,fp8,0,0.0199946661790212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,96,128,0,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,1,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,4,128,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,96,8,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,96,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,1,128,0,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,fp8,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,96,8,128,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,22.080907185872395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,31.839263916015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,32.09570058186849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,31.32483164469401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,22.68126932779948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,29.490806579589844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,30.94957987467448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,32.141588846842446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,22.59881083170573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,30.19935353597005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,16.287376403808594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,30.860794067382812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,22.826192220052082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,15.186293284098307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,11.81765874226888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,14.723888397216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,15.560955047607422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,11.240735371907553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,15.854836781819662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,11.456347147623697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,15.20803705851237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,14.691903432210287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,11.523780822753906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,15.39245859781901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,15.034372965494791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,14.532341003417969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,8.259466807047525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,11.625466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,7.78164800008138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,6.016645431518555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,7.752944310506185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,7.946608225504558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,5.804128011067708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,7.195152282714844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,5.981573104858398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,7.891914367675781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,7.469290415445964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,7.335791905721028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,5.887418746948242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,7.460271835327148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,4.060954729715983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,5.831040064493815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,7.712277094523112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,3.9387734731038413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,3.2311731974283853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,3.6952266693115234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,3.6702613830566406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,3.1398560206095376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,3.714421272277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,3.7237812678019204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,3.1301066080729165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,3.656010627746582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,3.680474599202474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,3.127786636352539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,3.687679926554362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,3.750725428263346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,3.1390132904052734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,13.307621002197266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,16.92745081583659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,17.810293833414715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,17.781504313151043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,13.336116790771484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,17.624186197916668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,17.892650604248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,17.622907002766926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,13.335567474365234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,17.199386596679688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,13.6288693745931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,9.118661244710287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,18.099610646565754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,9.155642827351889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,8.504021326700846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,7.210378646850586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,8.950597127278646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,6.72874641418457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,8.404021581013998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,6.708506902058919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,8.701685587565104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,8.739797592163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,6.895402908325195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,8.35425059000651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,8.632869084676107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,4.429397265116374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,9.090911865234375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,4.716128031412761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,3.672421455383301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,4.303018569946289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,6.874325434366862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,4.6753387451171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,3.514981269836426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,4.293504079182942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,3.5882186889648438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,4.104511896769206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,4.233194669087728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,4.185402552286784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,3.5230185190836587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,4.0957441329956055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,2.2280960083007812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,4.165013313293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,3.542634646097819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,2.2258826891581216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,2.1695146560668945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,2.1660106976826987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,1.934655984242757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,2.166656017303467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,2.1129867235819497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,1.923360029856364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,1.9937866528828938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,2.14084259668986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,2.1618399620056152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,1.9334079424540203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,2.152538617451986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,2.254101276397705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,1.9331413904825847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,9.600650787353516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,12.3428586324056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,11.671408335367838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,11.814144134521484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,10.006133397420248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,13.589188893636068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,11.724624633789062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,12.188128153483072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,9.752858479817709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,12.696314493815104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,6.93673578898112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,11.818101247151693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,9.72003173828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,6.4737122853597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,5.240021387736003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,6.175472259521484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,6.3336747487386065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,4.932970682779948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,6.461941401163737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,6.13426144917806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,4.885253270467122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,6.401509602864583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,4.879578590393066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,6.499573389689128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,6.162805557250977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,3.2769600550333657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,6.461013158162435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,4.894277254740397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,3.1248693466186523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,3.020064036051432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,2.8579413096110025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,2.57314666112264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,2.7037973403930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,3.162970542907715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,2.9953174591064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,2.568725268046061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,3.036346753438314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,2.580901304880778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,3.2486613591512046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,3.064922650655111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,1.875040054321289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,2.6004799207051597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,1.6593440373738606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,1.486037254333496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,1.5564746856689453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,1.552293300628662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,3.0656585693359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,1.43284273147583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,1.5570346514383953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,1.5588480631510417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,1.4293333689371746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,1.576266606648763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,1.5638186136881511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,1.4209920565287273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,1.5633813540140789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,1.5998986562093098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,1.433253288269043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,12.855845133463541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,16.389109293619793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,16.678704579671223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,15.78231430053711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,12.950725555419922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,18.036837259928387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,17.134591420491535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,16.560042063395183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,12.967535654703775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,17.302351633707683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,12.8450075785319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,16.994144439697266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,9.011280059814453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,6.86355717976888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,9.242352167765299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,8.746623992919922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,8.322394688924154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,6.43173344930013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,8.340213139851889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,6.488128026326497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,8.58354123433431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,7.98422368367513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,6.46171760559082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,8.396746953328451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,8.481818517049154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,4.491557439168294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,8.178677241007486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,6.4755096435546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,4.645333290100098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,3.5867894490559897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,3.9678773880004883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,4.2126773198445635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,3.329530715942383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,4.142549196879069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,3.3234666188557944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,4.2017866770426435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,4.00984541575114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,4.235871950785319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,3.332922617594401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,4.022816022237142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,3.344757397969564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,2.127445379892985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,1.8963252703348796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,2.216357390085856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,3.8739681243896484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,1.996389389038086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,1.9614027341206868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,1.9773653348286946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,2.0458079973856607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,1.7809653282165527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,1.9727892875671387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,1.776693344116211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,2.0145653088887534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,1.9487999280293782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,1.9851627349853516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,1.844282627105713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,1.1139039993286133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,1.2218879858652751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,1.0494986375172932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,1.0867626667022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,1.1181440353393555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,1.0034186840057373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,1.0857280095418294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,1.0948053201039631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,1.7814453442891438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,1.0836533705393474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,1.0944639841715496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,1.003882646560669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,1.0830132961273193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,1.1035626729329426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,1.0043093363444011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,1.0077653725941975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,7.799130757649739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,9.640207926432291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,9.419845581054688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,9.983482360839844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,7.86190923055013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,9.7204958597819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,9.405322392781576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,9.900933583577475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,8.024858474731445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,10.438202540079752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,5.497989018758138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,8.057461420694986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,5.350778579711914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,9.541599909464518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,4.302048047383626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,4.874592145284017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,4.7335465749104815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,3.9594027201334634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,4.963349342346191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,5.016965230305989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,3.9629653294881186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,4.740997314453125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,4.861520131429036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,3.9678560892740884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,4.807045300801595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,4.6998294194539385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,2.5327626864115396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,2.5352320671081543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,2.2236480712890625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,2.344634691874186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,2.313663959503174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,2.0845120747884116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,3.982826550801595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,2.449199994405111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,2.362799962361654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,2.2963573137919107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,2.445829391479492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,2.089066664377848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,2.404810587565104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,2.396970589955648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,2.076629320780436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,1.319434642791748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,1.315119981765747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,1.1969172954559326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,1.2853973706563313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,2.0616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,1.120298703511556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,1.2547893524169922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,1.2196853160858154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,1.1219200293223064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,1.2374186515808105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,1.2274826367696126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,1.1247466405232747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,1.2210400104522705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,1.2371573448181152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,1.2191946506500244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,1.1259893576304119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,0.728762706120809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,0.7458986441294352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,0.6842026710510254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,0.6960533459981283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,0.6994506518046061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,0.64847465356191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,0.6965546607971191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,0.6982560157775879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,0.6483786503473917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,0.6966613133748373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,0.7049386501312256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,0.7003733317057291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,0.7054346402486166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,0.6543200016021729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,0.6507840156555176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,9.772709528605143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,9.498448053995768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,7.984048207600911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,9.480031967163086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,8.021535873413086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,10.062378565470377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,9.551034927368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,9.575786590576172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,8.031797409057617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,10.277488072713217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,10.490234375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,8.081658681233725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,5.20140266418457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,5.5613759358723955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,4.424730618794759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,4.692314783732097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,4.653541247049968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,4.000469207763672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,4.644197463989258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,4.72328535715739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,4.004495938618978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,4.892389297485352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,4.019856135050456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,4.74184004465739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,4.928874651590983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,4.74289608001709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,2.4926719665527344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,2.2720905939737954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,2.551525274912516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,4.042330741882324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,2.277525266011556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,2.303429285685221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,2.0558932622273765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,2.285792032877604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,2.3304479916890464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,2.07041597366333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,2.3314132690429688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,2.3115199406941733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,2.06494935353597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,2.359562714894613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,2.357418696085612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,2.0766026178995767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,1.2866079807281494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,1.303872028986613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.1837546825408936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,1.180741310119629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,1.1979200045267742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,1.090725342432658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,1.1855573654174805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,1.1890613238016765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,1.0900746981302898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,1.1911413669586182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,1.1982826391855876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,1.0914133389790852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,1.2023786703745525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,1.2009173234303792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,0.6950399875640869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,0.7139999866485596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,0.6566346486409506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,0.651253342628479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,0.6628746589024862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.606714685757955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,0.6506346861521403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,0.6593493223190308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.607317328453064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,0.6545493205388387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,0.6642453273137411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,0.6567946672439575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,0.6648106575012207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.6108373403549194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.41202131907145184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.4142133394877116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.38998401165008545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.38628800710042316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.391482671101888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.3643626769383748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.6091946760813395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.3898080190022786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.3854986826578776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.3647679885228475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.38976001739501953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.38839999834696454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.3659093379974365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.3956480026245117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.39019731680552167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.36763731638590497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,1.0991840362548828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,5.0687252680460615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,6.037818908691406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,5.734282811482747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,5.794576009114583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,5.08626651763916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,5.933343887329102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,5.749029159545898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,5.733450571695964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,5.097781181335449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,6.039834976196289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,5.977280298868815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,3.1724160512288413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,5.138997395833333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,2.8700478871663413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,2.928677241007487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,2.5556960105895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,3.305349349975586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,2.8882614771525064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,2.875098546346029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,2.558560053507487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,2.839125315348307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,3.014341354370117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,2.9468533198038735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,2.903066635131836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,3.0003201166788735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,1.5793973604838054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,2.589893341064453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,2.5697174072265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,1.6630934079488118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,1.5027306874593098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,1.4398187001546223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,1.4443146387736003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,1.3737653096516926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,1.4401812553405762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,1.4646612803141277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,1.327877362569173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,1.4574186007181804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,1.4555999437967937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,1.336143970489502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,1.4594346682230632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,1.4736266136169434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,1.3420906066894531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,0.8803359667460123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,0.8293173313140869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,0.7687466939290365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,0.7716426849365234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,0.7119306723276774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,0.7694453398386637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,0.7740373611450195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,0.7133386929829916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,0.7707146803538004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,0.7791786988576254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,0.7741920153299967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,0.779317299524943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,0.717525323232015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.4655413230260213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.47301332155863446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.44200531641642254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,0.8363946278889974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.4331893523534139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,0.7149013678232828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.43622398376464844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.43958401679992676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.40591998895009357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.43951467672983807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.4382293224334717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.4065973361333211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.43886399269104004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.4338293472925822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.4089333216349284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.28726933399836224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.2696106632550557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.2603360017140706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.2606400052706401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.24663466215133667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.2602506677309672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.26016000906626385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.2481173276901245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.2619360089302063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.26291199525197345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.2506293257077535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.2642666697502136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.2642666697502136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.252074658870697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.44254934787750244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.28327999512354535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.4044906695683797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,6.411151885986328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,5.5404103597005205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,6.213834762573242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,6.170693079630534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,6.244874954223633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,5.566405614217122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,6.362026850382487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,6.255349477132161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,6.168517430623372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,5.574991861979167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,3.426015853881836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,5.65666135152181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,3.421754519144694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,3.178330739339193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,3.009002685546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,6.343605041503906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,3.0471038818359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,2.765381177266439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,3.07862917582194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,3.0858240127563477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,2.773514747619629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,3.0886878967285156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,3.1020212173461914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,2.7932373682657876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,3.1570825576782227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,3.0784212748209634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,1.721135934193929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,2.8113279342651367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,1.760309378306071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,1.6185973485310872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,1.527359962463379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,1.5389013290405273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,1.4122400283813477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,1.5309386253356934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,1.537450631459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,1.549909273783366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,1.5439947446187336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,1.5597012837727864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,1.5550026893615723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,0.8897919654846191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,1.4251999855041504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,0.9114080270131429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,0.8500426610310873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,1.4160159428914387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,0.8074613412221273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,0.7424906889597574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,0.8009386857350668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,0.804154634475708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,0.7461333274841309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,0.8072906335194906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,0.7987306912740072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,0.8126773039499918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,0.7709600130716959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,1.437071959177653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,0.8164213498433431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.4910879929860433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.49215467770894367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.4578293164571126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.43674667676289874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.44143466154734295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.4087040026982625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.4408373435338338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.4383093516031901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.40745067596435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.4394293228785197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,0.7561919689178467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.44572265942891437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.41019733746846515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.4500266710917155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.44762667020161945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.412992000579834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.2844480077425639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.2659253279368083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.24979199965794882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.25038933753967285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.239247997601827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.2488373319307963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.2514773408571879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.2381920019785563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.252293328444163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.2529226740201314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.2394239902496338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.2556106646855672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.25758399566014606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.24272000789642334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.17390932639439902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.17796266078948975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.16797866423924765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.1574666698773702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.158053328593572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.15059733390808105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.15993600090344748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.15741866827011108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.1507200002670288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.1565546691417694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.16025066375732422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.1488106648127238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.15949333707491556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.15773333112398782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.1521440049012502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,0.8176480134328207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.27827199300130206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,4.007439931233724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,3.7091894149780273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,4.0161387125651045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,4.09340254465739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,3.7332213719685874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,4.043253262837728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,4.035269419352214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,4.153951962788899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,3.747584025065104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,4.083973248799642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,3.7808052698771157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,2.3056747118631997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,2.3391572634379068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,4.091386795043945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,1.9971466064453125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,2.0024800300598145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,2.1612906455993652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,1.854981263478597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,2.004197279612223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,2.0302720069885254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,1.8621546427408855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,2.0175040562947593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,2.039893309275309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,1.8694400787353516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,2.0292372703552246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,2.04473606745402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,1.1723146438598633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,1.8884266217549641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.1103359858194988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,1.026863972345988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,1.039632002512614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,0.9548319975535074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,1.0294773578643799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,1.0419200261433919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,0.9577546914418539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,1.1889546712239583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,1.0322399934132893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,1.0487360159556072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,0.9621973037719727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,1.0445813337961833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,1.0501920382181804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,0.6319040060043335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.5865653355916342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.5460319916407267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.5457493464152018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.5082133213678995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.5464746554692587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.552565336227417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.509114662806193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.551909327507019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.551637331644694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.5118453502655029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.5528053442637125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.5563466548919678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.5157279968261719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.33536001046498615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.3440320094426473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.3219306667645772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.3020906647046407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.3028480013211568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.2833706736564636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.30188266436258954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.3038346568743388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.28298133611679077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.3020799954732259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.3048906723658244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.2833919922510783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.3066506584485372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.30620266993840534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,0.616864005724589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.19673067331314087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.204693337281545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.18927999337514242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.1722559928894043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.17267733812332153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.1665066679318746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.17270400126775107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.17536000410715738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.1669493317604065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.17571733395258585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.17358400424321493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.16883200407028198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.17639466126759848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.1779413421948751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.1707893411318461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.1251413325468699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.12635200222333273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.1236799955368042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.11474666992823283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.11541333794593811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.11122133334477742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.11529599626859029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.1146506667137146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.1113866666952769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.11414933204650879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.11478933691978455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.11550933122634888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.2877066731452942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.11575466394424438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.11149332920710246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.11124799648920695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,0.9706346988677979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,4.2366132736206055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,4.192639986673991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,4.117055892944336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,4.353466669718425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,4.3521121342976885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,4.329840024312337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,4.347023963928223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,4.405402819315593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,4.426725387573242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,4.445066769917806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,4.393205324808757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,2.5872373580932617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,4.896874745686849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,2.520688056945801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,2.5669333140055337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,2.1114452679951987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,2.0865599314371743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,2.0723093350728354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,2.139893372853597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,2.1272106170654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,2.1439733505249023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,2.164954662322998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,2.1599626541137695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,2.207845369974772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,2.09549872080485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,1.253941297531128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,1.2573813597361247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.2787199815114338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,2.2060426076253257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.0652320384979248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,1.0477066834767659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,2.4273120562235513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.0748213132222493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.0858773390452068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.0619466304779053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.0865973631540935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.0938613414764404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,1.063370704650879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.0976800123850505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.1047360102335613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.2084799607594807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,0.6462453206380209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,1.0624000231424968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.6450933218002319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.5477173328399658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.5494079987208048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.5360906521479288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.5520586570103964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.5569760004679362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.5422933499018351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.5553226470947266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.5598239898681641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.5467360019683838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.5636853377024332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,0.6366453170776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.33717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.3381599982579549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.33461864789326984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.2895466685295105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.2874506711959839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.2793440024058024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.28933332363764447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.2934346596399943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.2813173333803813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.2954453428586324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.2922666668891907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.2895413239796956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.29689600070317584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.29947733879089355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.2908906737963359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.18571199973424277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.18231467405954996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.18280533949534097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.15613333384195963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.15707199772198996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.1497760017712911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.1590079963207245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.15944000085194907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.15285866459210715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.5626879930496216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.16123732924461365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.15756799777348837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.1629813313484192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.16209066907564798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.1581013302008311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.10578667124112447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.10441066821416219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.08851200342178345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.08473599950472514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.08888000249862671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.08902933200200398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.08547199765841167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.09086400270462036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.15988266468048096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.5815360148747762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.09187199672063191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.060378665725390114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.062368000547091164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.056133334835370384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.055685331424077354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.08786666393280029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.08594133456548055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.05701333284378052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.055013333757718406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.05472533404827118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.05832533538341522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.058746665716171265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.05648000041643778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.056218668818473816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,3.5170186360677085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,3.493706703186035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,3.5163466135660806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,3.6512905756632485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,3.5935999552408853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,3.6514453887939453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,3.7005812327067056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,3.713904062906901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,3.751962661743164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,3.7976481119791665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,4.284336090087891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,2.169333299001058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,3.748058636983236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,2.266122659047445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,1.7631680170694988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,1.769850730895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,1.768821398417155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,2.2497599919637046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,1.795738697052002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,1.8455732663472493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,1.8061973253885906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,1.815765380859375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,1.7992159525553386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,1.855679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,1.8593066533406575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.0879466533660889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.0899999936421711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,1.8076799710591633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.1161812941233318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,0.8989066282908121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,0.8969386418660482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,0.8963893254597982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,0.9137492974599203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.138794740041097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,0.908128023147583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,0.923642635345459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,0.920805295308431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,0.9168533484141032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,0.9275306860605875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,0.928938627243042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.5569119850794474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.545146663983663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.5675413211186727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.46116801102956134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.4614666700363159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.45695467789967853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,0.9242613315582275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.466869314511617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.4705546696980794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.46619733174641925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.46804265181223553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.47113064924875897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.4697279930114746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.4774186611175537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.4782400131225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.2907680074373881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.2841386596361796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.29520533482233685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.24175467093785605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.24343466758728027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.23672000567118326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.24411199490229288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.24757333596547446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.24381333589553833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.25005332628885907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.2454506754875183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.24763200680414835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.25066665808359784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.25091733535130817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.25089067220687866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.1548799971739451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.16115732987721762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.13055466612180075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.13197867075602213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,1.054645299911499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.1328266660372416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.13301333785057068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.4984960158665975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.13014400005340576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.1572106679280599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.13247999548912048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.13149333000183105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.1365173359711965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.13546666502952576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.13506133357683817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.08923733234405518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.08822932839393616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.09181867043177287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.07358400026957194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.07415466507275899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.0703413337469101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.07378133138020833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.0735999991496404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.07170133292675018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.0747680018345515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.13341866930325827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.07083733379840851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.07504533231258392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.07538666824499766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.05243733525276184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.05194666484991709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.05499200026194254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.04862933357556661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.07567466795444489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.050741334756215416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.04684799909591675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.04841066896915436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.046351999044418335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.04814933240413666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.047007997830708824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.04910933474699656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.03204799940188726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.03264000018437704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.04885333279768626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,1.5913119316101074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.12837333480517069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,1.6161920229593914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,1.6031146049499512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,1.589903990427653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,1.6006986300150554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,1.6825599670410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,1.624293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,1.634874661763509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,1.713370641072591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,1.6852693557739258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,1.662549336751302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,0.9975093205769857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,0.9822026888529459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,1.0411146481831868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,0.8069866498311361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,0.8072960376739502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,0.8143520355224609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,0.814575990041097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,0.8157653013865153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,0.8306559721628824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,0.8189706802368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,0.8204639752705892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,0.8350773652394613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,0.8354293505350748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,0.833184003829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.5089333454767863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.5011573235193888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.5261866648991903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,1.9689706166585286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.4169386625289917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.41502400239308673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.42046932379404706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.42345066865285236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.42213332653045654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,0.9678186575571696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.42400534947713214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.4214186668395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.42725332578023273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.4340906540552775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.42827733357747394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.2699626684188843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.26038400332132977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.2751893401145935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.21911466121673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.21897067626317343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.21595199902852377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.22164267301559448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.2199839949607849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.22213866313298544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.22117332617441812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.22387200593948364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.22364266713460287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.2294506629308065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.22562666734059653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.2305013338724772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.14306666453679404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.14924800395965576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.11814933021863301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.11892267068227132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.11617599924405415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.4155999819437663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.4506506522496541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.14622933665911356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.11783466736475627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.12052266796429952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.12009599804878235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.11817600329717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.12330666184425354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.11904000242551167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.12310399611790974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.1225386659304301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.08201600114504497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.11872000495592754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.08509332935015361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.0673173318306605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.06825066606203715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.06634133557478587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.0663679987192154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.0662666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.06798933446407318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.06594133377075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.06880533198515575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.07011199990908305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.04667200148105621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.04622933268547058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.04215999941031138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.040421334405740104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.043231998880704246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.0422986646493276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.042090664307276406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.0422026664018631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.03183466692765554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.02473066747188568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.05049066742261251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,0.88482133547465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,0.8811519940694174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,0.8849919637044271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.08299200236797333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,0.8979626496632894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,0.8934079806009928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,0.9016799926757812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,0.9052639802296957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,0.9143839677174886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,0.9239839712778727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,0.8952266375223795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,0.914522647857666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.5438186724980673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,1.0388853549957275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.5440906683603922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.5612800121307373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.4496266841888428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.44779733816782635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.4586613178253174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.45583999156951904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.45537598927815753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.4599786599477132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.459935983022054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.45876801013946533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.46969600518544513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.4682453473409017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.28332799673080444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.45205867290496826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.2876746654510498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.2352693279584249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.23348265886306763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.2321760058403015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.23770666122436523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.23731732368469238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.4957546790440877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.23588800430297852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.23916266361872354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.24004799127578735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.2404693365097046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.24893865982691446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.24367467562357584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.24398932854334512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.15214932958285013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.14991999665896097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.15616533160209656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.12746133406956991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.27504533529281616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.12982400258382162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.1279039978981018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.12522666652997336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.12897066275278726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.129013329744339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.12690666317939758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.13144000371297201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.12541866302490234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.08281599978605907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.08126399914423625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.08623466889063518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.06956266860167186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.06916266679763794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06645866731802623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.06997333467006683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.06600533425807953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.07047466437021892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.07020266850789388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.06659733255704244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.07085333267847697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.07030400137106578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.0695306658744812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.13038933277130127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.04897066454092661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.04542933404445648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.04470933477083842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.0450133333603541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.1265120009581248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04229333500067393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.13013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.04409599800904592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.0324799989660581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.03230399886767069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.02977599948644638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.02996266633272171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.029951999584833782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.030224000414212544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.02980799973011017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.02979733298222224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.029098667204380035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.5804640054702759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.5854933261871338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.5878080129623413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.6043786605199178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.5929813385009766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.5882720152537028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.6171146631240845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.5957653522491455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.6010186672210693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.5954026778539022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.6069973309834799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.3394186496734619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.355130672454834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.5831253528594971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.2993813355763753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.30008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.3036266764005025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.3039039969444275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.3067786693572998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.30241066217422485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.3445653518040975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.30449066559473675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.3083626627922058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.300709327061971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.3087306618690491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.18362132708231607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.1792373259862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.18701332807540894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.15919466813405356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.15768532951672873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.16408000389734903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.1586186687151591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.30272533496220905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.15803733468055725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.15960533420244852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.15974400440851846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.1596213380495707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.1645813286304474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.16129600008328757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.1618880033493042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.09741866588592529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.08706133564313252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.15892799695332846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.08717333277066548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.08378666639328003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.0872213343779246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.08691199620564778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.08275199929873149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.08715200424194336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.08706133564313252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.0832426647345225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.08906132976214091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.086709330479304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.05620799958705902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.05934933324654897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.05213333169619242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.05227200190226237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.04980266590913137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.051674668987592064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.05212800204753876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.051039998730023704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.3073439995447795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.05125333368778229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.036133334040641785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.035887998839219414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.03382399926582972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.033957332372665405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.01815466706951459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.10021332899729411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.43055466810862225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.4285600185394287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.43800532817840576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.4323999881744385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.44257601102193195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.4503466685612996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.43109333515167236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.43982934951782227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.4400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.43613334496816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.44574932257334393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.24687999486923218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.2553279995918274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.22483199834823608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.22615466515223184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.2259733279546102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.22426666816075644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.2246560057004293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.22643200556437174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.22538133462270102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.2243679960568746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.22683199246724448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.24336000283559164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.22497600317001343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.2265440026919047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.23197867472966513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.1286133329073588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.1368053356806437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.11955199639002483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.12017066280047099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.11774933338165283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.11973866820335388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.12006400028864543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.11585066715876262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.1204906702041626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.1197653313477834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.11788266897201538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.12160000205039978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.12166399757067363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.12154133121172588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.07264000177383423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.07266133526961009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.07648533085982005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.0684746652841568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.0663679987192154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.06865600248177846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.06693333387374878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.06881066660086314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.06853866577148438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.06817600131034851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.06888000170389812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.0685280015071233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.04408533374468485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.13024533788363138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.04332800209522247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.04050666590531667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.0401706670721372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.04221866528193156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.04158399999141693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.43506133556365967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.028005334238211315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.027850667635599773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.021749332547187805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.020560000091791153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.019882666567961376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.022448000808556873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.016127999871969223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.36281601587931317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.36349332332611084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.36769068241119385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.37249600887298584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.36325331528981525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.3677866856257121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.3645013173421224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.3636639912923177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.3678559859593709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.3646506468454997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.36447465419769287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.2011573314666748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.20035733779271445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.21385065714518228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.18888000647226968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.18928533792495728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.18758932749430338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.18986133734385172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.18986133734385172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.18845866123835245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.18941332896550497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.19136534134546915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.18734399477640787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.191594660282135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.1916159987449646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.19222400585810342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.1077280044555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.11179199814796448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.11187199751536052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.10381866494814555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.10332266489664714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.10272533694903056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.10334400335947673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.1034453312555949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.10212266445159912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.10295466581980388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.37088533242543537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.10143466790517171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.10322667161623637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.10376532872517903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.10318932930628459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.062122667829195656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.10237333178520203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.05945600072542826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.06043200194835663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.059952000776926674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.058650667468706764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.059861332178115845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.06165333092212677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.06043200194835663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.03777066618204117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.0358240008354187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.03729599962631861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.02646933247645696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.06020799775918325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,0,0.30709866682688397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,fp8,fp8,0,0.2996906638145447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,0,0.3065279920895894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,0,0.30688534180323285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,2,128,0,1,fp8,fp8,0,0.3001280029614766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,0,0.30826133489608765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,0,0.3084106643994649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,4,128,0,1,fp8,fp8,0,0.2998986641565959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,0,0.30827200412750244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,0,0.3078400095303853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,8,128,0,1,fp8,fp8,0,0.2999093333880107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,0,0.1604746679464976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,0,0.30643733342488605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,0,0.16064533591270447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,0,0.16120533148447672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,0,0.16053332885106406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,0,0.16077867150306702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,0,0.16105600198109946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,2,128,0,1,fp8,fp8,0,0.15684266885121664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,0,0.1606613298257192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,0,0.1604693333307902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,64,128,0,1,fp8,fp8,0,0.15800533692042032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,0,0.16077333688735962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,0,0.16054399808247885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,8,128,0,1,fp8,fp8,0,0.15684800346692404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,1,128,0,1,fp8,fp8,0,0.1565013329188029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,fp8,fp8,0,0.08457600076993306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,0,0.08687466382980347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,1,128,0,1,fp8,fp8,0,0.08679999907811482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,0,0.08713066577911377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,0,0.08830400307973225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,2,128,0,1,fp8,fp8,0,0.08509332935015361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,0,0.08788266777992249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,0,0.08747733632723491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,4,128,0,1,fp8,fp8,0,0.08506666620572408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,0,0.08699733018875122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,0,0.0872213343779246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,8,128,0,1,fp8,fp8,0,0.08481066425641377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,0,0.05177066723505656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,0,0.05203199883302053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,1,128,0,1,fp8,fp8,0,0.04996799925963084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,64,4,128,0,1,fp8,fp8,0,0.156549334526062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,0,0.052111998200416565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,2,128,0,1,fp8,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,0,0.05199466645717621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,0,0.05183466772238413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,0,0.05197866757710775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,0,0.05190933247407278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,0,0.052111998200416565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,8,128,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,64,128,0,1,fp8,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,1,128,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,2,128,0,1,fp8,fp8,0,0.032058666149775185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,4,128,0,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,0,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,64,8,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,64,4,128,0,1,fp8,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,1,128,0,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,2,128,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,4,128,0,1,fp8,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,8,128,0,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,64,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,1,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,2,128,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,0,0.02235200007756551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,4,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,64,8,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,64,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,0,0.08674133817354839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,1,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,4,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,2,128,0,1,fp8,fp8,0,0.01651200031240781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,16.801605224609375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,20.981792449951172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,21.511510213216145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,22.913958231608074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,16.77684275309245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,23.47076924641927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,22.638900756835938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,22.592938741048176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,11.863290150960287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,16.93991978963216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,11.942459106445312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,9.01416015625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,8.502527872721354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,11.614388783772787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,11.430720011393229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,11.48849105834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,8.619642893473307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,11.323354085286459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,11.075477600097656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,11.858559926350912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,6.240698496500651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,5.795157114664714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,8.759312311808268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,4.585253397623698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,5.737354914347331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,5.341621398925781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,5.55021858215332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,4.446165402730306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,4.421615918477376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,5.728426615397136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,5.83351453145345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,2.9520320892333984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,5.719930648803711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,4.432741483052571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,2.8768959045410156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,2.471552054087321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,2.772128105163574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,2.686496098836263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,2.3889973958333335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,2.7760372161865234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,2.8335307439168296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,2.751311937967936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,2.7423839569091797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,2.398090680440267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,2.390117327372233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,10.102879842122396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,13.277509053548178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,13.210447947184244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,12.341626485188803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,10.198069254557291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,13.753706614176432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,13.46060307820638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,13.185370127360025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,7.360501607259114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,6.905263900756836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,5.310111999511719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,10.126933415730795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,6.228426615397136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,6.65225092569987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,5.098869323730469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,6.616122563680013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,6.406917572021484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,5.082847913106282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,6.340037027994792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,3.330890655517578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,6.389557520548503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,5.119087855021159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,3.2122186024983725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,2.8148746490478516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,3.124575932820638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,3.1982345581054688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,3.168058713277181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,3.2721227010091147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,2.6982666651407876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,2.6801652908325195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,2.6974452336629233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,3.251493453979492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,3.3098827997843423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,1.6699946721394856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,1.6521226565043132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,1.61899201075236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,1.4833173751831055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,1.5239413579305012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,1.623578707377116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,1.6316053072611492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,1.6520907084147136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,1.6478506724039714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,1.6472694079081218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,1.486837387084961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,1.4820373853047688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,7.196218490600586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,9.440730412801107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,9.391824086507162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,9.351674397786459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,7.413573582967122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,8.937440236409506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,9.640864054361979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,9.267866770426432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,4.634943962097168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,7.398805618286133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,5.370725631713867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,3.9020748138427734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,4.496677398681641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,4.346607844034831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,3.708714803059896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,4.799909273783366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,4.60753599802653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,3.716405232747396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,4.545242627461751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,4.901525179545085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,2.393610636393229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,3.7303199768066406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,2.389568010965983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,2.0615466435750327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,2.204298655192057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,2.1875786781311035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,1.9676052729288738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,2.2044320106506348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,2.3278239568074546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,1.970853328704834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,2.210949261983236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,2.2817333539326987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,2.0055626233418784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,1.310154676437378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.1343573729197185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,1.208026647567749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,1.2121013005574544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,1.1013867060343425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,1.229050636291504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,1.2406293551127117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,1.1052160263061523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,1.2017546494801838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,1.2390666802724202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,1.107466697692871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,1.2065013249715169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,9.71284294128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,11.821904500325521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,12.636783599853516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,12.338512420654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,9.708143870035807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,12.645744323730469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,13.394351959228516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,13.542037963867188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,6.786885579427083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,9.767114639282227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,5.279205322265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,7.096506754557292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,6.186469395955403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,5.762687683105469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,4.89902941385905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,6.002010981241862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,4.909701347351074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,6.131445566813151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,6.17037836710612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,3.2334133783976235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,3.2714560826619468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,4.928970654805501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,6.231599807739258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,2.709184010823568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,2.8912693659464517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,2.81608517964681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,2.5469013849894204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,2.9273866017659507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,2.5494240125020347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,3.042997360229492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,3.010101318359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,1.6730507214864094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,2.5556480089823403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,3.112565358479818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,1.5782507260640461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,1.6190667152404785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,1.515077273050944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,1.3631839752197266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,1.4942879676818848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,1.5183199246724446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,1.3589332898457844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,1.529754638671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,1.4964532852172852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,1.5068906148274739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,0.876309315363566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,0.8780372937520345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,0.8395093282063802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,0.8386987050374349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,0.7761279741923014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,0.8387893040974935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,0.8405493100484213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,0.7791253725687662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,0.8414719899495443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,0.8383466402689616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,0.7806986967722574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,1.3696746826171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,0.8114986419677734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,7.020336151123047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,5.933573404947917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,7.213162740071614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,7.295743942260742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,5.948160171508789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,7.068309148152669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,7.300154368082683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,7.360975901285808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,3.628549257914225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,4.027989387512207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,6.01420783996582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,3.2518879572550454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,3.5867201487223306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,3.8126398722330728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,3.017941474914551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,3.570309321085612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,3.6058985392252603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,3.025893211364746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,3.449381192525228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,3.546794573465983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,1.869215965270996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,3.05837345123291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,1.6816693941752117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,1.732357343037923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,1.74015474319458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,1.5810613632202148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,1.940384070078532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,1.7459947268168132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,1.77292267481486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,1.5863359769185383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,1.761023998260498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,1.8304533958435059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,1.595461368560791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,0.9842560291290283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,1.062490701675415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,0.9219786326090494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,0.9417386849721273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,0.9440000057220459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,0.9144799709320068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,0.9448053042093912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,0.8674133618672689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,0.9558400313059489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,0.9491466681162516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,0.8721120357513428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,0.5798720121383667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,0.5697333415349325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.5336853265762329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,0.5493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,0.5455413262049357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.5068213144938151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,0.5532960096995035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,0.5473973353703817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.5070666472117106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,0.5534186760584513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,0.5498133500417074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.5109333197275797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,0.9455786546071371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,6.13102912902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,7.434640248616536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,7.293754577636719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,7.012346903483073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,6.13217035929362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,7.314048131306966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,7.3379465738932295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,7.148591995239258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,3.8855307896931968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,3.8834241231282554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,6.186954498291016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,3.3630507787068686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,3.5092267990112305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,3.7804959615071616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,3.0597333908081055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,3.441072146097819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,3.7184693018595376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,3.0793279012044272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,3.5429280598958335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,3.5745598475138345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,1.9184746742248535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,3.101050694783529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,1.9675520261128743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,1.7336692810058594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,1.7342933019002278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,1.7695147196451824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,1.5788853963216145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,1.7531840006510417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,1.7894080479939778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,1.58733336130778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,1.7594985961914062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,1.7933227221171062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,1.5977813402811687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,0.9829013347625732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,0.9601439634958903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,0.9157973130544027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,0.9201227029164633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,0.8424586455027262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,0.9227093060811361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,0.9214186668395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,0.8452426592508951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,0.9287839730580648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,0.9356693426767985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,0.8501706918080648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.558842658996582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,0.5501653353373209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.5104480187098185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.5113813479741415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.5111039876937866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.474181334177653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.5178613265355428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.5144480069478353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.47513067722320557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.5198346773783366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.5189066727956136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,0.9974346955617269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.3277440071105957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.3084320028622945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.30264000097910565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.30608532826105755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.307258665561676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.3091573317845662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.477786660194397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.28920533259709674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.323472003142039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.31034133831659955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.310810665289561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.2919360001881917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.28622400760650635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,3.9068425496419272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,4.433050791422526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,4.475018819173177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,4.538026809692383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,3.9101438522338867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,4.524021466573079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,4.477408091227214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,2.4926346143086753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,4.590928077697754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,3.956437428792318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,2.189472039540609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,2.1672852834065757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,2.1421333948771157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,2.38265069325765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,2.2133706410725913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,1.9787146250406902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,1.9652105967203777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,2.1954399744669595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,2.177637259165446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,2.1840160687764487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,1.2257386843363445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,1.2318133513132732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.1396693388621013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,1.123685359954834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,1.120959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,1.025381326675415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,1.1296640237172444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,1.1262293656667073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,1.9960479736328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,1.1382346947987874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,1.1389706929524739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,1.039669354756673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,0.6650559902191162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,0.6598399877548218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,0.6202506621678671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,0.6024106740951538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,1.0323572953542073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.55348801612854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,0.6035039822260538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,0.6049760182698568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,0.6080319881439209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,0.6093013286590576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.6096426645914713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.3657333453496297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.37673600514729816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.3407839934031169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.3405119975407918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.3183679978052775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.3420960108439128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.5612853368123373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.3445813258488973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.3205173412958781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.34703465302785236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.3483039935429891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.3222879966100057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.22549867630004883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.23038933674494425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.34669331709543866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.20924800634384155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.20940800507863364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.19579732418060303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.21033066511154175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.20970666408538818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.19829332828521729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.2139893372853597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.21033066511154175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.21162132422129312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.20124799013137817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.5561013221740723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,4.741648038228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,4.885477383931478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,4.7227786382039385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,4.285663922627767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,4.316975911458333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,4.954490661621094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,4.728944142659505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,4.9009653727213545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,2.6693973541259766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,4.376528104146321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,2.4292426109313965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,2.1353972752889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,2.6542399724324546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,2.3264427185058594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,2.338853359222412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,2.3374133110046387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,2.147856076558431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,2.3491039276123047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,2.387066682179769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,2.175546646118164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,1.3241973718007405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,2.3291093508402505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,1.3586187362670898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,1.2447199821472168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,1.2024319966634114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,1.1952053705851238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,1.1385546525319417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,1.1962292989095051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,1.2020959854125977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,1.2162293593088787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,1.213637351989746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,1.113642692565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,0.6989226341247559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,0.7084106604258219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,0.6310666799545288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,0.6312106847763062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,1.104149341583252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.5796159903208414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,0.6332266728083292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,0.6362506548563639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.5847253402074178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,0.6372319857279459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,0.6397333145141602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.37746667861938477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.3869280020395915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.35740800698598224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.34541865189870197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.3455359935760498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.320522665977478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.3454773426055908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.35026665528615314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.32175467411677044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.3522080183029175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.5887786547342936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,0.6549280087153116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.224565327167511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.22486400604248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.21065066258112589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.1976319948832194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.19993066787719727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.18832000096638998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.20138132572174072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.20147732893625894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.19147199392318726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.20250133673350015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.20531733830769858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.3266506592432658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.13591466347376505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.1279306709766388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.12843199570973715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.12352533141771953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.12820800145467123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.12994666894276938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.35197333494822186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.14040533701578775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.1291306714216868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.12380799651145935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.12385599811871846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.13060266772905985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,3.1234827041625977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,3.1412054697672525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,3.1352853775024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.13917332887649536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,3.1490186055501304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,2.893967946370443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,3.2305332819620767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,2.92847474416097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,2.880943934122721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,1.7719945907592773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,1.8049227396647136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,1.6605599721272786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,1.560106595357259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,1.5786773363749187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,1.5713013013203938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,1.5892213185628254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,3.2270453770955405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,1.4397172927856445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,1.5945067405700684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,0.9138879776000977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,1.4665973981221516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,0.9278773466746012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,0.8574720223744711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,0.8116532961527506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,0.8119413057963053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,0.744149367014567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,0.8123199939727783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,0.8224319616953532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,0.7503999869028727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,1.4542400042215984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,0.8226933479309082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,0.8277013301849365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.4827253421147664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.49133865038553876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,1.5921972592671711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.45494401454925537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.4310826857884725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.43200000127156574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.3991039991378784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.43355735143025714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.43597865104675293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.4023520151774089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,0.7586932977040609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.44071467717488605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.4045706590016683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.2701813379923503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.27058132489522296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.2513706684112549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.23784534136454263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.23865065972010294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.2249386707941691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.2404586672782898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.22616000970204672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.2424266735712687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.4376213153203328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.22841066122055054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.1580586632092794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.16064533591270447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.1509760022163391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.13854400316874185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.1388213336467743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.12947199741999307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.14009066422780356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.24015466372172037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.13226667046546936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.14014933506647745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.14286933342615762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.13543466726938883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.10041067004203796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.10193600257237752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.09898133079210918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.09503466884295146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.09528000156084697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.09545066952705383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.13913066188494363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.24732265869776407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.09283199906349182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.09090666969617207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,3.421104113260905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.09522133072217305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,3.439039866129557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,3.5038986206054688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,3.451690673828125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,3.458869298299154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,3.467733383178711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,3.487205187479655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,3.529621442159017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,2.020410696665446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,1.9584213892618816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,3.5481974283854165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,1.9615306854248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,1.6620052655537922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,1.6685120264689128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,1.707274595896403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,1.6728906631469727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,1.6967360178629558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,1.6985333760579426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,1.7053546905517578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,1.7392479578653972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,0.9716373284657797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,0.9839413166046143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,0.9830880165100098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,0.8447413444519043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,0.847488005956014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,0.817898670832316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,0.8473066488901774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,0.8502240180969238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,0.825434684753418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,0.8671786785125732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,1.6730880737304688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,0.8564533392588297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.5004426638285319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.5058613220850626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.43701334794362384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.43937599658966064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.42158933480580646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.43676801522572833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.4378186861673991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,0.8637173175811768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.4245493412017822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.44810132185618085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.4459679921468099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.2712693413098653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.26294400294621784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.2674506704012553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.23279466231664023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.23411200443903604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.22257065773010254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.2351093292236328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.23433067401250204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.22454933325449625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.23849066098531088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.2389706571896871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.23300800720850626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.15169066190719604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.14737066626548767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.14828800161679587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.12763733665148416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.12839999794960022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.1218239963054657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.12873599926630655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.12882133324941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.1239520013332367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.13206400473912558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.132042666276296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.12925333778063455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.08805333574612935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.5077866713205973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.08927466471989949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.0777706652879715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.07197333375612895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.0761653333902359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.07561600208282471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.07135466734568278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.07648533085982005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.07753066718578339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.08523733417193095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.4424266815185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.052842666705449425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.0775733341773351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.05273599922657013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.05269333223501841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.04833599925041199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.07272533575693767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.054714664816856384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.052229334910710655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,2.931253433227539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,2.953141212463379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,3.009317398071289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,2.930373191833496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,2.9678878784179688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,3.0181760787963867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,3.020085334777832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,3.0354719161987305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,1.7099253336588542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,1.7373919486999512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,1.4087146123250325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,1.7309813499450684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,3.088816006978353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,1.4413866996765137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,1.4176160494486492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,1.4349120457967122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,1.4510773022969563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,1.423749287923177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,1.4689280192057292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,0.8515466849009196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,1.5132533709208171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,0.8405120372772217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,1.486421267191569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,0.8669866720835367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,0.7205279668172201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,0.7009066740671793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,0.7200106779734293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,0.7230933507283529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,0.7069973150889078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,0.7367253303527832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,0.7380426724751791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,0.7177226543426514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,0.7391146818796793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.4299199978510539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.44514667987823486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.37061866124471027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.3703999916712443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.3596373399098714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.37114667892456055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.37165868282318115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.3631093502044678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.43829866250356037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.3800266583760579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.37931732336680096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.2304853399594625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.20012799898783365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.1990613341331482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.19171200195948282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.3824160099029541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.2002506653467814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.23480532566706339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.19311465819676718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.20960533618927002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.20303465922673544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.20736000935236612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.12847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.12480533123016357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.12924266854921976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.10601600011189778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.10667199889818828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.10429867108662923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.10814399520556132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.19987734158833823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.11171733339627583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.11036800344785054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.11198400457700093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.07436800003051758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.07334933181603749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.0795253316561381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.06457599997520447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.2362133264541626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.060565332571665444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.06486399968465169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.06495466828346252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.06357866525650024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.04614399870236715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.04603200157483419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.04530133306980133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.040847999354203544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.03186666717131933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.04042666653792063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.03065599997838338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.10828266541163127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.2763786315917969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.2778720060984294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.313909371693929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,1.279205322265625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,1.2838186422983806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.3840053876241047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,1.3221440315246582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,1.3279146353403728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.04033066580692927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,0.7810133298238119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,0.7572000026702881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,0.8094720045725504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.6473226547241211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.6345066626866659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,0.6477440198262533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,0.648090680440267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.6401973168055216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,0.6649173498153687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,0.6640479962031046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.40436800320943195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.39417068163553876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.4145706494649251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.33315199613571167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.6500320037206014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.3355626662572225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.3271413246790568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.33617599805196124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.374538739522298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.33583998680114746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.3315093318621318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.34548266728719074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.34281599521636963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.6689919630686442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.21454399824142456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.20882133642832437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.21869866053263345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.1786293387413025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.1778293251991272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.17617599169413248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.17965867122014365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.18090667327245077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.1755519906679789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.18436266978581747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.18426666657129923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.1204266647497813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.11674666404724121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.12185066938400269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.09796266754468282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.10249066352844238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.09502399961153667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.09940800070762634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.10126933455467224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.09744000434875488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.10326932867368062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.10301867127418518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.10354133447011311
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.06832533578077953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.06609599788983662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.07253866891066234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.05807466804981232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.34510934352874756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.05596266686916351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.05964800218741099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.06001600126425425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.055061335364977516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.18382932742436728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.058261334896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.04038933416207632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.03813866774241129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.04027199993530909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.03828266759713491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.04005866746107737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.040591999888420105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.029872000217437744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.030058667063713074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.03001066545645396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.03988266736268997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.028933333853880566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.023631999890009563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.022682666778564453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.7004853089650472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.7007733186086019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.6974826653798422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.7056053479512533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.7050027052561442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04305600126584371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.702128012975057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.7191680272420248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.7167680263519287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.42667198181152344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.43000535170237225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.45262400309244794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.3579519987106323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.355679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.35929067929585773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.35875733693440753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.35869332154591876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.3686293363571167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.3682560125986735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.7248053550720215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.3710506757100423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.35947199662526447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.21968533595403036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.23077332973480225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.18952532609303793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.18998932838439941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.18858667214711508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.1913493275642395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.19819732507069907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.1888479987780253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.19425066312154135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.19408533970514932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.19555199146270752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.22472000122070312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.12652800480524698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.10382399956385295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.10436800122261047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.10142933328946431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.10545600454012553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.10313600301742554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.1065120001633962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.11968533198038737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.1074773371219635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.0697386662165324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.07444799939791362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.060405333836873375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.10365866621335347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.057946667075157166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.06000000238418579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.06020799775918325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.06028266747792562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.0403466671705246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.038149334490299225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03718400001525879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.037845333417256675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.03062933435042699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.02880000074704488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.028234665592511494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.02788266787926356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.018725333114465077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.017968000223239262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.45074665546417236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.45286401112874347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.4532746473948161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.4542613426844279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.4508266846338908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.4607359965642293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.46186665693918866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.46251734097798664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.4851040045420329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.26763733228047687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.26604799429575604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.2789439956347148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.23656533161799112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.24230400721232095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.2353066603342692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.2359679937362671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.23636800050735474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.23454399903615317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.24154132604599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.1209333340326945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.2409813404083252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.24292266368865967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.14165332913398743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.15016532937685648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.1279146671295166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.1276479959487915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.12410133083661397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.10564800103505452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.12633066376050314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.12874666849772134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.13026666641235352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.08179733157157898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.08100800216197968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.08739200234413147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07218666871388753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.07181333502133687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.06917333106199901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.12589866916338602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.14375999569892883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.13179733355840048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.07292266686757405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.07243200143178304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.045610666275024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.0440533310174942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04586133360862732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.07231466472148895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.04288533329963684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.04412800073623657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.04448533554871877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.04740266501903534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.030293333033720653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.022346665461858112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.022656001150608063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.018229333062966663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.332479993502299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.3306933244069417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.0687253326177597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.3331200083096822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.33211199442545575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.3351733287175496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.3351253271102905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.33504001299540204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.34356268246968585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.19132800896962485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.18953599532445273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.1992853283882141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.17437867323557535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.1751520037651062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.3366026480992635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.17492266496022543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.17332800229390463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.17493333419164023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.17584532499313354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.17553067207336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.18127467234929404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.1034399966398875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.11049600442250569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.09521599610646565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.09531199932098389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.09319999814033508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.17417067289352417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.09519466757774353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.09330667058626811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.09725333253542583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.09460799892743428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.05861333509286245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.0584853341182073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.0561653325955073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.05657066901524862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.09604266285896301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.056234667698542275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.05648533503214518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.0590826670328776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.03807999938726425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.037151999771595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.037962667644023895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.03797333439191183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.037130666275819145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.025663999219735462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.020341333001852036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.016303999970356624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.2773386637369792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.27613866329193115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.2775306701660156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.282970666885376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.09513066212336223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.2760319908459981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.27960532903671265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.279258668422699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.2792053421338399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.2862933278083801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.15424000223477682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.1546293298403422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.16246933738390604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.14672533671061197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.14639467000961304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.14498666922251383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.14640532930692038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.14638933539390564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.14519466956456503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.14659733573595682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.14803199966748556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.1474506656328837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.08430932958920796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.0848479966322581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.08603200316429138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.0824480007092158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.08067733546098073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.08115733166535695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.08266666531562805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.08043733239173889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.08183466891447704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.08080000181992848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.04901866614818573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.0498986691236496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.049642667174339294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.082997332016627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.031871999303499855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.031727999448776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.03297066688537598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.020069333414236706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.020687999824682873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.03402133285999298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.04887466629346212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,float16,float16,0,0.23450666666030884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,fp8,fp8,0,0.22971200942993164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,float16,float16,0,0.23495999972025552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,float16,fp8,0,0.23468265930811563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,4,128,0,1,fp8,fp8,0,0.2283626596132914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,float16,float16,0,0.23471999168395996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,float16,fp8,0,0.23465599616368613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,8,128,0,1,fp8,fp8,0,0.22850133975346884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,0,0.12384000420570374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,0,0.12408000230789185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,48,128,0,1,fp8,fp8,0,0.12174933155377705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,float16,float16,0,0.12382933497428894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,float16,fp8,0,0.12574932972590128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,2,128,0,1,fp8,fp8,0,0.12141866485277812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,float16,float16,0,0.1243893305460612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,float16,fp8,0,0.1265600025653839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,4,128,0,1,fp8,fp8,0,0.1218826671441396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,float16,float16,0,0.12387733658154805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,float16,fp8,0,0.12639466921488443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,0,0.07021866738796234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,0,0.07087466617425282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,48,2,128,0,1,float16,fp8,0,0.23489065965016684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,float16,float16,0,0.07072533170382182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,float16,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,2,128,0,1,fp8,fp8,0,0.06890666484832764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,float16,float16,0,0.07076799869537354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,float16,fp8,0,0.07127466797828674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,float16,float16,0,0.07088533540566762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,float16,fp8,0,0.07076799869537354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,8,128,0,1,fp8,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,0,0.04292800029118856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,48,128,0,1,fp8,fp8,0,0.06880000233650208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,48,128,0,1,fp8,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,float16,float16,0,0.04378133515516917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,float16,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,2,128,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,float16,float16,0,0.04367466767628988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,float16,fp8,0,0.04515733321507772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,4,128,0,1,fp8,fp8,0,0.041482667128245033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,float16,float16,0,0.043663998444875084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,48,8,128,0,1,fp8,fp8,0,0.04201066493988037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,48,128,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,2,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,4,128,0,1,fp8,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,48,8,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,48,4,128,0,1,fp8,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,fp8,fp8,0,0.022656001150608063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,float16,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,2,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,4,128,0,1,fp8,fp8,0,0.022885332504908245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,float16,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,8,128,0,1,fp8,fp8,0,0.022426667312781017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,48,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,2,128,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,4,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,48,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,48,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,float16,float16,0,0.01666133354107539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,2,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,4,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,48,8,128,0,1,fp8,fp8,0,0.12277866403261821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,2,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,48,8,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,8,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,48,4,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,14.02115249633789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,17.81498082478841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,18.669498443603516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,18.924901326497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,14.124411265055338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,19.897215525309246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,19.364911397298176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,19.63470967610677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,10.007749557495117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,14.113051096598307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,9.83998934427897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,7.417941411336263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,7.098304112752278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,9.87939198811849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,9.333871841430664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,9.525258382161459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,7.173445383707683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,9.7969118754069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,8.945845286051432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,4.979845364888509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,7.28329594930013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,4.511210759480794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,10.065120061238607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,3.8588692347208657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,4.538511912027995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,3.6934026082356772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,4.458559989929199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,4.730213483174642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,4.736255963643392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,3.7297439575195312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,4.494314511617024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,4.823312123616536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,2.3756213188171387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,3.7197707494099936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,2.4592053095499673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,2.0635733604431152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,2.325786590576172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,2.3391946156819663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,2.0069333712259927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,2.29420804977417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,2.023578643798828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,2.2417972882588706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,2.2920053799947104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,2.0149332682291665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,2.3359039624532065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,10.094805399576822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,10.943611145019531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,10.80791982014974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,8.401903788248697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,8.478410720825195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,10.682917277018229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,11.00543467203776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,10.85964330037435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,8.42419179280599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,5.485717137654622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,6.104901631673177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,4.431045214335124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,5.4476318359375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,4.261280059814453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,5.461311976114909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,5.177055994669597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,4.25330130259196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,5.679759979248047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,2.7970558802286782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,5.658613204956055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,5.240207990010579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,4.295503934224446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,2.9054187138875327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,2.342954635620117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,2.4984213511149087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,2.628063996632894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,2.2536800702412925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,2.7368799845377603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,2.252234617869059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,2.68340269724528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,2.259061336517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,2.584143956502279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,2.597754637400309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,1.3998026847839355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,1.4352906545003254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,1.3665439287821453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,1.3791039784749348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,1.2750399907430012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,1.3726132710774739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,1.3734933535257976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,1.2879359722137451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,1.3797760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,1.3853492736816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,1.2517653306325276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,1.2455039819081624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,5.99514643351237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,7.362368265787761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,7.625792185465495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,7.6552480061848955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,6.013898849487305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,7.840368270874023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,7.687231699625651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,6.0345815022786455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,4.106245358784993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,3.9012959798177085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,3.240597407023112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,7.9036909739176435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,4.021765391031901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,3.789093335469564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,3.086912155151367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,3.095338821411133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,4.107146581013997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,3.712186813354492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,3.789504051208496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,3.8874454498291016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,3.1146294275919595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,1.8977333704630535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,1.9043307304382324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,1.7362133661905925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,1.8765974044799805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,1.8392106691996257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,1.6592693328857422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,1.8711946805318196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,1.9249067306518555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,1.854197343190511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,1.9300106366475422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,1.6591359774271648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,1.056389331817627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,1.0603520075480144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,0.9653600056966146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,1.6527199745178223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,1.015664021174113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,1.023919979731242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,1.0192906856536865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,1.0185333093007405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,0.9311253229777018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,1.019541343053182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,1.022869348526001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,0.9319732983907064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,0.9282879829406738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,8.106053034464518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,10.078853607177734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,10.18283716837565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,10.063125610351562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,8.06825065612793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,10.018335978190104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,10.601072311401367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,10.534432093302408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,5.893269220987956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,8.121445337931315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,5.8784745534261065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,4.386634508768718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,5.485888163248698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,4.065365473429362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,4.773109436035156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,5.211994806925456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,4.088357289632161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,5.505856196085612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,5.158762613932292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,5.4688161214192705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,2.692602793375651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,2.5744214057922363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,4.117306709289551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,2.2460427284240723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,2.374373277028402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,2.3719894091288247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,2.1161866188049316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,2.455338637034098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,2.489290714263916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,2.122591972351074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,2.4900693893432617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,1.3231360117594402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,1.310480038324992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,1.2047839959462483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,1.271946668624878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,1.259621302286784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,2.418458620707194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,2.14304526646932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,1.258512020111084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,1.2837333679199219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,1.1459999879201253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,1.2664319674173992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,1.3036106427510579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,1.1545440355936687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,0.7373279730478922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,0.7566986878712972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,0.6860427061716715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,0.7091946601867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,0.7246452967325846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,0.656607985496521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,0.7157226403554281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,0.7254133224487305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,0.6594773530960083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,1.1463039716084797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,0.730517307917277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,0.7187573115030924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,0.6619840065638224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,6.0313065846761065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,5.896874745686849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,4.934165318806966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,6.062634785970052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,6.1095733642578125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,4.951301256815593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,3.0697971979777017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,6.229541142781575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,4.991584142049153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,6.206250508626302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,2.7080958684285483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,3.1167945861816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,3.065413475036621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,2.922154744466146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,2.5237226486206055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,2.9057814280192056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,2.9770774841308594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,2.5281599362691245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,1.6299893061319988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,2.9278666178385415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,2.9053332010904946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,2.543706734975179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,1.5660746892293294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,1.414021333058675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,1.4669653574625652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,1.4889012972513835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,1.3220372994740803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,1.4799307187398274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,1.4873119990030925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,1.4872159957885742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,1.4977332750956218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,1.3307092984517415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,0.8539306322733561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,0.8587893644968668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,0.775007963180542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,0.7981706460316976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,1.3281599680582683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,0.8036266962687174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,0.7284479935963949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,0.8242879708607992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,0.8109920024871826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,0.8075893719991049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,0.80840531984965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,0.734656016031901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.4904906749725342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.4512853225072225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.46325866381327313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.4686719973882039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.4304906527201335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.4705973466237386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.4666080077489217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.4301226536432902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.4808479944864909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.4696640173594157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.4764853318532308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.43492265542348224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,0.7295626799265543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,5.08403205871582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,6.075162887573242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,6.069946924845378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,6.0462188720703125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,5.097359975179036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,5.9010664621988935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,6.264048258463542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,6.1366933186848955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,3.0923945109049478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,3.2812960942586265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,5.147328058878581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,2.796090761820475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,2.936842600504557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,2.547274589538574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,2.9772160847981772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,2.97598934173584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,2.969461441040039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,2.5603307088216147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,3.208501180013021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,1.5689600308736165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,2.5847573280334473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,1.6328800519307454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,1.4479306538899739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,1.4554932912190754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,1.4719573656717937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,1.31822403271993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,1.458837350209554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,1.5393385887145996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,1.324938694636027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,1.4775999387105305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,1.5469973882039387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,1.3378507296244304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,3.0574401219685874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,0.8702826499938965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,0.7671466668446859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,0.7788639863332113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,0.7839252948760986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,0.705456018447876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,0.7794559796651205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,0.7877439657847086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,0.7097439765930176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,0.8461546897888184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,0.7948853174845377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,0.717024008433024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.4697226683298747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.4318293333053589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.43437333901723224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,0.788096030553182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.4349706570307414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.46260801951090497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.439578652381897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.44041065375010174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.40246931711832684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.44576533635457355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.442629337310791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.4063146511713664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.2805066704750061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.28411199649175006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.26224533716837567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.40200531482696533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.2640053431193034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.2430186669031779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.2675039966901143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.2466826637585958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.2672106623649597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.27062400182088214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.2504426638285319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.2595786650975545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.2656266689300537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,3.7582826614379883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,3.755557378133138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,3.2348321278889975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,3.7149012883504233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,3.2532854080200195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,3.7029813130696616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,3.7517919540405273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,3.7507947285970054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,2.020773410797119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,1.8221653302510579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,3.2909812927246094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,1.8288052876790364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,1.6327733993530273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,1.8279253641764324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,1.9862186113993328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,1.8422667185465496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,1.638005256652832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,1.825493335723877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,1.8792319297790527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,1.6638399759928386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,1.0227946440378826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,1.0810293356577556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,0.9504640102386475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,0.9411626656850179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,0.9560320377349854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,0.8552266756693522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,0.9468586444854736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,0.9618613719940186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,0.8620053132375082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,0.9550879796346029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,0.9722773234049479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.5497706731160482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,0.5640106598536173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.5134720007578532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,1.806330680847168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.5164106686909994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.8836853504180908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.5181866486867269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.5172906716664633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.4680960178375244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.5099786520004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.5199573437372843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.5233226617177328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.47379199663798016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.31731732686360675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.32148800293604535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.2951146761576335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.29017066955566406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.2908693353335063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.2929546634356181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.2940693298975627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.2716853419939677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.2981119950612386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.2977919975916545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.27509333690007526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.19724800189336142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.1994933287302653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.18549333016077676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.1813653310139974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.18289599816004434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.16881599028905234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.1819146672884623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.18290666739145914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.16884267330169678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.1822506586710612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.1838080088297526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.17095466454823813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.27033599217732746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,0.8708746433258057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,4.007669448852539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,4.081162770589192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,3.5663894017537436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,4.085551897684733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,3.571626663208008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,4.018752098083496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,4.060640017191569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,4.095205307006836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,2.199648062388102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,3.6326773961385093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,2.2191999753316245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,2.0191946029663086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,1.9606666564941406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,1.9656747182210286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,1.7737760543823242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,1.9773227373758953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,1.9810345967610676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,2.019024054209391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,2.0214079221089682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,1.1441813309987385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,1.1394346555074055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,1.0377493699391682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,1.0103092988332112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,1.0195199648539226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,1.813477357228597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,0.9121759732564291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,1.0213066736857097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,1.0157439708709717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,0.918234666188558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,1.0360159873962402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,1.0291840235392253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,0.9343039989471436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,0.5910293261210123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,0.599455992380778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.549450675646464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.5363573233286539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.5365866820017496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.4853760004043579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.5408480167388916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.5395466486612955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.487770676612854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.5469760100046793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.5467199881871542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.4946933190027873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.3254506587982178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.33021867275238037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.29632532596588135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.2955840031305949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.2712426582972209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.2971893350283305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.29871465762456256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.2733599940935771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.3028533260027568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.3023359974225362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.27555733919143677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.19079466660817465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.1943626602490743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.3014773329099019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.16900267203648886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.17212265729904175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.15661333004633585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.1725119948387146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,1.7884532610575359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.16060800353686014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.17434666554133096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.17712533473968506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.1646293302377065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.12006400028864543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.12130133310953777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.11713600158691406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.11156800389289856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.17921066284179688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.10616532961527507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.1135093371073405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.11546132961908977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.10719466209411621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.1128053367137909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.11357333262761433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.10753599802652995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.11371733744939168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,2.628885269165039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,2.64030392964681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,2.3776000340779624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,2.658245404561361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,2.6608053843180337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.17267733812332153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,2.3977440198262534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,2.6902879079182944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,2.700261433919271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,2.4412906964619956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,1.5168693860371907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,1.3282506465911865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,1.328879992167155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,1.4985920588175456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,1.3321973482767742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,1.3831146558125813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.209829330444336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.195856014887492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,1.3592747052510579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,1.337941328684489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,0.7680373191833496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,0.785978635152181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,1.3533546129862468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,0.6882239977518717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,0.6910239855448405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.23088534673055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,0.6989173094431559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,0.714021364847819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.6282133261362711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.6213813225428263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,0.7025386492411295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,0.709882656733195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,0.6380586624145508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.41118931770324707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,0.7048213481903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.41815467675526935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.3724000056584676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.3705759843190511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.33585067590077716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.3710666497548421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.37961065769195557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.3400799830754598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.3772960106531779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.38169066111246747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.3449600140253703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.23562665780385336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.21331733465194702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.20469866196314493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.20444266001383463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.19128533204396567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.20695465803146362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.2107093334197998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.1925493280092875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.2126986583073934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.2123039960861206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.13763200243314108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.23068267107009888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.1397546629110972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.13062399625778198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.12038399775822957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.12103466192881267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.11211199561754863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.12314666310946147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.11310399572054546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.12712533275286356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.19641600052515665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.12277866403261821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.11561066905657451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.08753599723180135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.08547199765841167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.1200266679128011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.08066666622956593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.08586133519808452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.08669867118199666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.08046400050322215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.0863146682580312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.0897920032342275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.08636266986529033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.3826933304468791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.08663466572761536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,2.79641056060791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,2.790837287902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.08253333469231923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,2.6846720377604165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,2.82478396097819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,2.8479092915852866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,2.8692426681518555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,3.0196959177652993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,1.652714729309082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,1.6070027351379395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,2.994789441426595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,2.9074986775716147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,1.6100160280863445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,1.4132374127705891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,1.4122239748636882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,1.3539892832438152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,1.4277653694152832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,1.3625280062357585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,1.4507412910461426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,1.4448960622151692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,0.8367413679758707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,1.4659412701924641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,0.8289439678192139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,0.8177599906921387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,0.7148746649424235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,0.7162133057912191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,0.7254506746927897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,0.7255573272705078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,0.6953866481781006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,0.7382986545562744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,0.734773317972819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,1.4262827237447102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,0.7062666416168213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,0.6867733001708984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.4230240186055501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.3709546724955241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.37164799372355145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.34832533200581867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.3771413167317708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.3780213197072347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.357258677482605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.38152531782786053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.4394986629486084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.4269546667734782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.2336533268292745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.22796799739201865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.22472000122070312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.19967466592788696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.19966399669647217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.18735466400782266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.3641226689020793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.2021013299624125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.20336000124613443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.1918506622314453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.20548800627390543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.20604799191157022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.19677333037058511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.13235732913017273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.13039466738700867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.12871999541918436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.11039466659228007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.11155733466148376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.10291733344395955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.11097066601117452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.11179733276367188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.10575466354688008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.11361066500345866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.10988799730936687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.07256000240643819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.07257066667079926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.07589333256085713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.06625600159168243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.06728533407052358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.059978668888409935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.06778133412202199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.06612800061702728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.06102933486302694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.06650666892528534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.06685333450635274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.38281599680582684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.05023466547330221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.04897599915663401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.04636266827583313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.0470719983180364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.044079999128977455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.04347200194994608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.04833066463470459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.06216000020503998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.11426132917404175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,2.3765014012654624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,2.3770079612731934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,2.296250661214193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,2.3958239555358887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,2.3988320032755532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,2.5007360776265464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,2.608677387237549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,1.4248426755269368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,2.58894936243693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,2.5329972902933755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,1.3909974098205566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,1.41701873143514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.202191988627116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.1998079617818196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.1598026752471924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.2111413478851318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.168768008550008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.247045358022054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.229418675104777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.271888017654419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,0.7094826698303223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,0.718506654103597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.2118079662322998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.6083733240763346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.6074026823043823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.5895093282063802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.6182613372802734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.6173760096232096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.5960053205490112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,0.7283946673075358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.6263840198516846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.380240003267924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.36819199721018475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.37134401003519696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.31437865893046063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.31548800071080524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.2963306705156962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.32044800122578937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.32089600960413617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.3054080009460449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.631770650545756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.32631999254226685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.3257066607475281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.3155679901440938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.19778666893641153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.19876267512639365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.17056532700856528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.1699999968210856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.16010133425394693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.17299733559290567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.1723733345667521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.1651573379834493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.175653338432312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.17553067207336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.20408533016840616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.11378666758537292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.11166933178901672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.11266666650772095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.09329600135485332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.09342400232950847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.0857973297437032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.09507733583450317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.09504533807436626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.08906666437784831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.09530133008956909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.09716799855232239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.0942186713218689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.0645066648721695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.05728533367315928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.05672533313433329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.6090026696523031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.0591893345117569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.05845333139101664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.05913066864013672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.06503466765085857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.05902933577696482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.053173333406448364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.04257600009441376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.04460800190766653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.04261333247025808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.038176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.041450666884581246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.04211199780305227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.0439626673857371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.0421066681543986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.03182933231194814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.03181333343187968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.029946667452653248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.02784000088771184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.080298662185669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.0755733648935955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.168778657913208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.0548906326293945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.0875360171000164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.085034688313802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.0659733613332112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.1326613426208496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.168768008550008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.6434986591339111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.6654880046844482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.5425920089085897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.1089173158009846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.536629319190979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.5509653488794962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.5505119959513346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.5411306619644165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.5641546646753947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.56004265944163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.5524959961573283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,0.6620800097783407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.3428586721420288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.3444426854451497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.2811093330383301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.27981332937876385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.26687467098236084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.2842666705449422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.2864533265431722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.27825067440668744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.29019733270009357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.2893706758817037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.2844640016555786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.18450133005777994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.18017599980036417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.3449973265329997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.1525973379611969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.1525920033454895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.14418133099873862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.15584533413251242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.15665066242218018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.150629331668218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.15800533692042032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.1567306617895762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.15387200315793356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.10382933417956035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.10342400272687276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.10579733053843181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.08667733271916707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.18332266807556152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.08684800068537395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.07886933286984761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.08730666836102803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.0832479993502299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.08943999807039897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.08897599577903748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.08717333277066548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.05955199897289276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.0535093347231547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.054661333560943604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.054192001620928444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.05590933561325073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.03791466603676478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.54202667872111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03456533451875051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.038634667793909706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.028746667007605236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.024383999407291412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.026357332865397137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.024608001112937927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.5946400165557861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.5953173240025839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.03615466753641764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.58460799853007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.6045546531677246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.6018133163452148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.5902080138524374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.615829348564148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.6268213192621866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.3726026614507039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.35819733142852783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.3666293223698934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.3049439986546834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.3041546742121379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.2903626759847005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.3100586732228597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.31065066655476886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.30245866378148395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.31436266501744586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.6031200091044108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.3135146697362264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.30830933650334674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.19344000021616617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.19362133741378784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.16272000471750894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.15586666266123453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.16492266456286112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.17012800772984824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.16101866960525513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.16663466890652975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.16666133205095926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.164383997519811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.10455999771753947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.10386666655540466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.1092693308989207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.0890880028406779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.09119466940561931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.0892639954884847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.09150399764378865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.08685866991678874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.090938667456309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.0918933351834615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.09090666969617207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.059978668888409935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.062234664956728615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.052746668457984924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.05286400020122528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.04957866668701172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.05415999889373779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.05415999889373779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.054378668467203774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.18716265757878622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.05410666763782501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.037077332536379494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.0371573343873024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.03606933355331421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.02606400102376938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.02629333237806956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.025888000925381977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.020101333657900494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.026949333647886913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.16057067116101584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.3839999834696452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.3749013344446818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.389792005221049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.38399465878804523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.40801600615183514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.3844746748606364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.38974400361378986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.22545599937438965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.3890933195749919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.23354132970174155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.20132267475128174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.20042133331298828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.1970613400141398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.20390399297078451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.3944853146870931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.2018773357073466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.20589866240819296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.21109867095947266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.20407466093699136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.12370133399963379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.12096533179283142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.1283519963423411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.1074773371219635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.1090826690196991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.10159466663996379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.11075199643770854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.10839466253916423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.10566400488217671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.10987200339635213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.10930666327476501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.10962667067845662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.06850133339564006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.06836266815662384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.07321600119272868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.06204266846179962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06061866879463196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.058650667468706764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.06238933404286703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.060080001751581825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.23144533236821493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.06241066753864288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.03941333293914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.03814399987459183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.03997866561015447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.039919999738534294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.027855999767780304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.20415999492009482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.029167999823888142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.029071999092896778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.023818666736284893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.02369600037733714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.018709332992633183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.2808000048001607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.2794453303019206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.2796480059623718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.283680001894633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.28390934069951373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.2841813365618388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.28521066904067993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.2885439991950989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.16317866245905557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.16236266493797302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.17004267374674478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.14851199587186178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.14999467134475708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.14430933197339377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.14917332927385965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.14898133277893066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.14844800035158792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.14864533146222433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.15341333548227945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.15080533425013223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.08824533224105835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.09278933207194011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.2863360047340393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.08038933575153351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.08049599826335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.08062399923801422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.08118933439254761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.08216000099976857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.07858666777610779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.05074666440486908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.050741334756215416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.08268799881140391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.04996799925963084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.04866133133570353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.048623998959859215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.05036266644795736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.05042133231957754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.032501332461833954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.08574400345484416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.033759998778502144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.05004266897837321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.03369066615899404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.02569066733121872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.024501333634058636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.019839999576409657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.033002667129039764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.23306665817896524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.2334559957186381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.23434134324391684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.23467733462651572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.23753066857655844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.2363306681315104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.23444799582163492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.23913600047429404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.12974933783213297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.12962133685747781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.1376053293546041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.2344640096028646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.12403733531634013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.12386666735013326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.12410666545232137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.12483732899030049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.12404800454775493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.12475732962290446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.07249066730340321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.12405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.07047466437021892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.06969599922498067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.06897599995136261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.07051200171311696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.07064533233642578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.12389333049456279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.04205866654713949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.07057066758473714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.04427733520666758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.04307200014591217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.029781334102153778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.02254933367172877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.021541332205136616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.021717332303524017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.030426666140556335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.018768000106016796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.016602666427691776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,float16,fp8,0,0.19944000244140625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,fp8,fp8,0,0.19448000192642212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,float16,fp8,0,0.19991467396418253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,fp8,fp8,0,0.19378666083017984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,float16,float16,0,0.19975467522939047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,float16,fp8,0,0.19969600439071655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,8,128,0,1,fp8,fp8,0,0.19525333245595297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,0,0.10558399558067322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,fp8,fp8,0,0.10328533252080281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,4,128,0,1,float16,float16,0,0.19961067040761313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,float16,float16,0,0.10672533512115479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,float16,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,2,128,0,1,fp8,fp8,0,0.10345066587130229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,float16,float16,0,0.10735999544461568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,float16,fp8,0,0.10563199718793233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,4,128,0,1,fp8,fp8,0,0.1037546694278717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,float16,float16,0,0.1056160032749176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,0,0.1071626643339793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,fp8,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,0,0.06005333364009857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,0,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,40,128,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,float16,float16,0,0.060271998246510826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,float16,fp8,0,0.06066666543483734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,2,128,0,1,fp8,fp8,0,0.060133333007494606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,float16,float16,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,float16,fp8,0,0.06105599800745646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,40,8,128,0,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,4,128,0,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,40,2,128,0,1,float16,float16,0,0.1983519991238912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,float16,fp8,0,0.06018133461475372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,fp8,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,0,0.037845333417256675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,0,0.037845333417256675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,40,128,0,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,float16,float16,0,0.037962667644023895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,float16,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,2,128,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,float16,float16,0,0.03775999943415324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,4,128,0,1,fp8,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,40,8,128,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,40,8,128,0,1,float16,float16,0,0.06010666489601135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,2,128,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,fp8,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,8,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,40,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,2,128,0,1,fp8,fp8,0,0.022128000855445862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,float16,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,4,128,0,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,40,8,128,0,1,fp8,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,40,128,0,1,fp8,fp8,0,0.017978666971127193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,2,128,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,4,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,40,8,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,40,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,40,128,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,8,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,40,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,2,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,40,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,40,4,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,40,4,128,0,1,float16,float16,0,0.02811199923356374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,11.09786605834961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,14.668949127197266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,14.855018615722656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,14.161599477132162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,11.562234242757162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,15.052767435709635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,16.070741017659504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,15.76254399617513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,11.449515024820963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,14.770933787027994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,11.311419169108072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,15.135546366373697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,7.665749231974284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,5.885749181111653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,7.852405548095703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,7.7291412353515625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,5.758277257283528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,7.166746775309245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,7.662272135416667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,5.665738423665364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,7.353386561075847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,7.390832265218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,5.678810755411784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,7.638213475545247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,7.544335683186849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,7.657552083333333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,3.9164158503214517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,4.047952016194661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,3.066464106241862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,3.5871572494506836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,5.707712173461914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,3.574634552001953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,2.9786720275878906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,3.7516212463378906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,3.5411465962727866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,3.774597485860189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,3.6680479049682617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,2.999482790629069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,3.5894346237182617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,2.9925225575764975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,3.001333236694336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,3.772320111592611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,1.82097593943278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,1.8592212994893391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,1.8970880508422852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,1.8312586148579915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,1.6239412625630696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,1.8742507298787434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,1.8486720720926921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,1.6737653414408367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,1.6291413307189941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,1.8499093055725098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,1.7947893142700195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,1.6309226353963215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,1.8045387268066406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,1.6330240567525227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,1.9501439730326335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,8.529258728027344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,6.642693201700847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,8.09228261311849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,8.479594548543295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,6.777077356974284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,8.479514439900717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,8.467109044392904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,8.54800542195638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,6.704762776692708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,9.321791966756185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,4.860053380330403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,8.322373072306315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,7.044037501017253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,4.840421358744304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,3.581077257792155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,3.9153760274251304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,4.406559944152832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,3.397621472676595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,4.436496098836263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,4.548501332600911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,3.4062506357828775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,4.315605481465657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,4.501536051432292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,3.4136425654093423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,4.3028214772542315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,4.454314549763997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,2.1154613494873047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,3.4525171915690103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,2.2619733810424805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,1.885642687479655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,2.0441226959228516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,2.0475145975748696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,1.8139840761820476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,2.0303573608398438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,2.0421279271443686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,2.0657386779785156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,2.0612853368123374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,1.8263306617736816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,2.1035680770874023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,1.8098079363505046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,2.025872071584066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,1.8238132794698079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,1.2577119668324788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,1.1336747010548909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,1.0457599957784016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,1.1014293034871419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,1.1179786523183186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,1.1040373643239338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,1.1091466744740803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,1.0131466388702393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,1.1087626616160076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,1.1091307004292805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,1.0157653490702312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,1.1250773270924885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,1.0137333075205486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,1.1193066438039143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,1.0210239887237549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,4.786389350891113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,6.101653416951497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,6.232063929239909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,5.98684819539388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,4.811429341634114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,6.143472035725911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,6.1156050364176435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,6.343050638834636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,4.8144105275472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,5.990800221761067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,3.0893173217773438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,6.173088073730469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,4.851946512858073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,2.60861873626709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,2.8583625157674155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,3.0353867212931314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,2.4767252604166665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,3.0787731806437173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,3.011018753051758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,2.4841440518697104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,2.8832321166992188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,2.7930240631103516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,2.494767983754476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,2.9478187561035156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,3.0289440155029297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,3.057445208231608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,1.7737226486206055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,2.5106560389200845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,1.5481546719868977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,1.3881600697835286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,1.5586934089660645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,1.4574507077534993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,1.3315093517303467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,1.4663999875386555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,1.5152692794799805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,1.4743946393330891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,1.5994826952616374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,1.3368266423543294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,1.4732054074605305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,1.48471466700236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,1.343781312306722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,0.8593119780222574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,0.8554720083872477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,0.7869546413421631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,0.8494826952616373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,0.8257599671681722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,0.7570559978485107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,0.8204960028330485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,0.859765370686849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,1.334810733795166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,0.8215680122375488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,0.8350506623586019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,0.8254506587982178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,0.8276159763336182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,0.76582932472229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,0.7587680021921793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,0.7608959674835205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,6.406554539998372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,8.088373184204102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,7.932975769042969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,7.840576171875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,6.438896179199219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,8.32426643371582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,8.145882924397787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,7.808357238769531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,6.574047724405925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,8.554831822713217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,6.591290791829427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,8.931376139322916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,4.158138593037923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,3.4675572713216147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,4.314101219177246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,4.0623518625895185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,3.8416694005330405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,3.2546399434407554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,4.114512125651042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,4.047898610432942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,3.8962294260660806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,3.2772693634033203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,3.2749226888020835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,4.147557258605957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,2.0594080289204917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,3.303663889567057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,2.096890608469645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,1.8042933146158855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,3.9231414794921875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,3.862410545349121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,2.0555200576782227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,1.897264003753662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,1.7035840352376301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,1.8814560572306316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,1.7023785909016926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,1.9062827428181965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,1.9738240242004395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,1.7095732688903809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,1.9131253560384114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,1.9052426020304363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,1.9016745885213215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,1.7218292554219563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,1.050154685974121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,1.0736479759216309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,0.9725759824117025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,1.0093333721160889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,1.006383975346883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,0.9246880213419596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,1.0141493479410808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,1.0100746949513753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,0.9267679850260416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,1.0093013445536296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,1.007749319076538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,0.9289759794871012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,1.0173973242441814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,1.0165066719055176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,0.9361600081125895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,0.6035786469777426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.5625173250834147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,0.5746293465296427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,0.5786133209864298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.5370719830195109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,0.5895413160324097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,0.5807146628697714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.5379733244578043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,0.6016853253046671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,0.5928213198979696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.5418506860733032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,0.5892053445180258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,0.5953760147094727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.5434826612472534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,0.5808159907658895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,3.942938804626465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,4.783674558003743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,4.885957400004069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,4.72486941019694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,4.547301292419434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,4.666032155354817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,3.948650677998861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,3.956933339436849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,4.804330507914226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,5.055770556131999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,4.779770533243815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,4.004495938618978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,2.4584852854410806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,2.5032960573832193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,2.429925282796224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,2.0080854098002114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,2.1711039543151855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,2.3188106218973794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,2.331205368041992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,2.010159969329834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,2.230303923288981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,2.259552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,2.422757307688395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,2.0226240158081055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,2.330613295237223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,2.314746697743734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,2.047877311706543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,1.235759973526001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,1.140613317489624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,1.1662826538085938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,1.1639306545257568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,1.0629973411560059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,1.1685386498769124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,1.250986655553182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,1.16867200533549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,1.2097760041554768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,1.1806346575419109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,1.085647980372111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,1.1849546432495117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,1.1933653354644775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,1.080847978591919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,0.6765920321146647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,0.7226453622182211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,0.629098653793335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,0.6410826841990153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,0.6464746793111166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.5907893180847168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,0.6414026816685995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,0.647002657254537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.5925279855728149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,0.6482986609141032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,0.6509279807408651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.5935573180516561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,1.0649867057800293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,0.6505813201268514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,0.6556373437245687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.39636798699696857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.40246399243672687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.3717759847640991
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.3760960102081299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.37642133235931396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.35129066308339435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.3777120113372803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.37568533420562744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.3535573482513428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.38022398948669434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.3805919885635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.3565760056177775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.38202667236328125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.385535995165507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.3574719826380412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.5989973147710165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,4.676810582478841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,4.735082626342773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,4.027119954427083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,4.601178805033366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,4.661871910095215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,4.05021317799886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,4.516405423482259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,4.790901184082031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,4.073989232381185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,4.826565424601237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,2.490597407023112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,4.770949363708496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,2.591071923573812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,2.237679958343506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,2.2360266049702964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,4.124282519022624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,2.314271926879883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,2.0286240577697754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,2.306277275085449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,2.246394634246826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,2.035136063893636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,2.2577013969421387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,2.312506675720215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,2.0594080289204917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,2.287072022755941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,2.3514453570048013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,1.2664319674173992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,1.2673760255177815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.2768373489379883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,1.154538631439209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,1.1824159622192383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,1.054645299911499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,1.1852746804555256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,1.1618346373240154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,1.0578506787618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,1.1996906598409016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,1.1701599756876628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,1.0673706531524658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,1.17630402247111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,1.2162293593088787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,2.072960058848063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,0.6971840063730875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,0.6202666759490967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,0.6222826639811198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,0.6170719861984253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.5693120161692301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,0.619482676188151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,0.6237599849700928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,0.667248010635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.5725013415018717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,0.6293439865112305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,0.627781351407369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.5745333433151245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,0.6307359933853149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,0.6289333502451578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.5808000167210897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.37995731830596924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.3791733185450236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,1.0786666870117188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.3497759898503621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.3468159834543864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.32733867565790814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.3521759907404582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.3491199811299642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.35632534821828205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.35254398981730145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.3287573258082072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.35651731491088867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.3609653313954671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.3328533371289571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.23227200905481973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.23249600330988565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.21863466501235962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.21744000911712646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.21758399407068887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.20179200172424316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.21753599246342978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.21821866432825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.2017973264058431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.21887999773025513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.2171306610107422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.2018400033315023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.2180160085360209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.22059732675552368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.20764267444610596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.32626134157180786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.3525386651357015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,2.8977654774983725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,2.5669120152791343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,2.8115412394205728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,2.8137547175089517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,2.862506548563639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,2.9097865422566733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,2.931002616882324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,2.5967893600463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,2.957482655843099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,2.9878721237182617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,2.5832106272379556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,1.6307199796040852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,2.6395467122395835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,1.7146719296773274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,1.429968039194743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,1.4390506744384766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,1.344037373860677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,1.458778699239095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,1.4276000658671062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,1.306874672571818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,1.4310612678527832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,1.4341546694437664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,1.3285280068715413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,1.4474080403645833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,1.4533599217732747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,1.3393333752950032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,0.821664015452067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,1.4523146947224934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,0.7643199761708578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,0.744879961013794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,0.7662239869435629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,0.6843840281168619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,0.7415680090586344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,0.7561600208282471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,0.688202699025472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,0.8118506272633871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,0.7606986363728842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,0.6917546590169271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,0.7619573275248209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,0.7695840199788412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.4431840181350708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.4525493383407593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.41462401549021405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.4078986644744873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.40749335289001465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,0.7497759660085043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.4131253163019816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.4079573154449463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.37918933232625324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,0.7039999961853027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.41278398036956787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.3816213210423787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.4178239901860555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.4198880195617676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.3862186670303345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.2560906608899434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.26175467173258465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.24151466290156046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.23199466864267984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.23334399859110513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.21981332699457803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.23497066895167032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.23474133014678955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.22184000412623087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.2353066603342692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.23673067490259805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.22405334313710532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.24047466119130453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.376362681388855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.2262666622797648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.1625333329041799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.16198399662971497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.15493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.14958399534225464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.14307199915250143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.1504853367805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.1513866682847341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.14241066575050354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.14969066778818765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.15223466356595358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.14194132884343466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.15161066253980002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.24193066358566284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.1423733333746592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.41044267018636066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,3.0720427831014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.15320533514022827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,2.80403741200765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,3.0748586654663086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,3.088298797607422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,2.8239625295003257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,3.165429433186849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,3.133994738260905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,2.8478027979532876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,3.1960268020629883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,3.204906781514486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,2.9106613794962564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,1.727669397989909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,1.7382399241129558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,3.1179466247558594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,1.5206133524576824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,1.5382827123006184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,1.4099359512329102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,1.6158026059468586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,1.5418027242024739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,1.419317404429118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,1.5346399943033855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,1.554752031962077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,1.4288959503173828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,1.5359306335449219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,1.5610987345377605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,1.5834827423095703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,0.885866641998291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,0.8988640308380127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,0.8325760364532471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,0.784554640452067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,0.7931733131408691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,0.728928009668986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,0.799786647160848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,0.7964746952056885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,0.7353119850158691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,0.7940320173899332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,0.8050933678944906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,0.7377013365427653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,0.8113280137379965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,0.8145173390706381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,0.7528426647186279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.47647468249003094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.4437280098597209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.4206240177154541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.3906666835149129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,1.4575573603312175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.42265601952870685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.46587200959523517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.3941386540730794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.42660268147786456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.4328746795654297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.3949120044708252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.43643200397491455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.43536531925201416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.40277334054311115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.26023467381795246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.26685865720113117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.4352373282114665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.23407467206319174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.2376693288485209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.4201226631800334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.23899734020233154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.2342080076535543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.2221119999885559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.23678932587305704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.2421919902165731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.22405334313710532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.24683199326197305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.24288533131281534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.2467893362045288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.15383467078208923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.1579200029373169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.1497066617012024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.13658666610717773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.13715733091036478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.12813867131868997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.13662399848302206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.13715199629465738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.12833600242932638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.13686933120091757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.13755200306574503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.13064000010490417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.139984001715978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.22814400990804037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.09868266185124715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.09980266292889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.09815466403961182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.09497599800427754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.09650133053461711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.09123200178146362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.09524266918500264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.14054933190345764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.13686933120091757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.09639466802279155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.09326933821042378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.09360000491142273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.09109333157539368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,2.0573760668436685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,2.0451839764912925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.21980265776316324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,1.8826826413472493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,2.0479520161946616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,2.0701546669006348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.09303466478983562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,1.8969705899556477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,2.0694239934285483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,1.9129385948181152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,2.126800060272217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,2.109386603037516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,1.9532426198323567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,1.2077759901682537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,1.1874826749165852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,2.074965318044027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.1075466473897297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,1.0305226643880208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,1.0296320120493572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,0.9512853622436523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,1.0314559936523438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,1.0405813058217366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,0.9568640391031901
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,1.0434292952219646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,1.0486400127410889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,0.9658453464508057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.0622453689575195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.0722506841023762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,0.989840030670166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,0.6042293310165405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,0.6215519905090332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.5752533276875814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.5314293305079142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.5381333430608114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.49836798508961994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.5350346565246582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.5447413523991903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.49934399127960205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.5405280192693075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.5506240129470825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.5041439930597941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.5528373320897421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.5553439855575562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.32728532950083417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.33433600266774494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.3102560043334961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.29026132822036743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.28990399837493896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.27060266335805255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.2882080078125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.29445866743723553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.2918720046679179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.29577066500981647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.27422932783762616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.5153226852416992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.30001600583394367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.30056534210840863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.279968003431956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.18781334161758423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.17620799938837686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.15929599603017172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.15829867124557495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.15246400237083435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.15870400269826254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.1585919956366221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.2714719971021016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.16306133071581522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.1660160024960836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.15706132849057516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.1712053418159485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.16876800855000815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.16197866201400757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.18516266345977783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.11313066879908244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.10889066259066264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.09956266482671101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.10046933094660442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.09307733178138733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.09937066833178203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.09966400265693665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.09508267045021057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.10084266463915507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.10106133421262105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.10046933094660442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.10225600004196167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.09522133072217305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.07283199826876323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.07410133381684621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.07246933380762736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.07258666555086772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.0687253326177597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.07074133555094402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.07213866710662842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.07270933190981548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.06885333359241486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.07067200044790904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.07148266832033794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.06876799960931142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.11038933197657268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,2.1673173904418945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.15397333105405173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,2.1042186419169107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,2.197103977203369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,2.167184034983317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,2.1272640228271484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,2.340208053588867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,2.3149333000183105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,2.577850659688314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,2.166762669881185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,2.359226703643799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,1.2976106802622478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,2.564789295196533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,1.280245304107666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.2893919944763184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.0932640234629314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.0901333491007488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,2.303978602091471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.1092639764149983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.0982080300649006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,1.079482634862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.165013313293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.1436053117116292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.270565350850423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,1.0652213096618652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.1691253185272217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.1403253078460693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,0.6450666586558024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.2719146410624187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,0.6416639884312948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,0.6474719842274984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.570522665977478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.5620266596476237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.5440906683603922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.5760960181554159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.5669546524683634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.5509920120239258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.5963360071182251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.5862933397293091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.6254613399505615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.6000800132751465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.5852959950764974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.3441653251647949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.33273067077000934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.33850133419036865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.30058666070302326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.2959786653518677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.28328533967336017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.3033546606699626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.28780267635981244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.3137706716855367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.3118720054626465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.31100799640019733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.31564799944559735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.6310666799545288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.30991466840108234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.31496532758076984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.1832480033238729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.16125866770744324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.2976693312327067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.1601653297742208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.1527359982331594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.16511999567349753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.16075199842453003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.15547733505566916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.16796799500783285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.16915200153986612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.16543466846148172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.17122133572896323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.16941332817077637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.18246400356292725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.10579199592272441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.10662399729092915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.10434666275978088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.09085333347320557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.0890826682249705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.086709330479304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.0899733304977417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.09101866682370503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.08573866883913676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.09537600477536519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.09320533275604248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.09386666615804036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.09477333227793376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.18241065740585327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.09500799576441447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.05892266829808553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.062496001521746315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.05598933498064677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.05776533484458923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.058229332168896995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.05717866619427999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.09542399644851685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.03777066618204117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.038533332447210945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.03977066775163015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.03880000114440918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.16908266146977743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,1.8204639752705891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,1.8193119366963704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,1.8085707028706868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,1.8636693954467773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,1.8313172658284504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,1.828879992167155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,1.9834240277608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,1.9765653610229492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.274282614390055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,1.9958720207214355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,1.9766079584757488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.1052213509877522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.102277358373006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.1339413324991863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,0.9285013675689697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,0.9242613315582275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,0.9122560024261475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,0.9406773249308268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,2.269925276438395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,0.9332053661346436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,0.9254346688588461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,0.9917120138804117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,0.9777866999308268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.1189226309458415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,0.9925013383229574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,0.9725759824117025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.555738647778829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.1227946281433105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.569594661394755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.4779093265533447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.46565866470336914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.48765865961710614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.4814666509628296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.4730293353398641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.5027626752853394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.5541813373565674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.47856001059214276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.5065173308054606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.4992426633834839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.5503466526667277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.2901279926300049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.2887519995371501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.29522132873535156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.2518240014712016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.24918399254480997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.5025920073191324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.25463465849558514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.5523946682612101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.24832000335057577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.26521599292755127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.2627466718355815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.2690933346748352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.2624533375104268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.26018667221069336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.2630239923795064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.15497066577275595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.2413813273111979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.1597493290901184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.13481600085894266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.13379200299580893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.12979732950528464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.13649066289265951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.1364479959011078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.137061337629954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.1432906687259674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.1420906682809194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.14337600270907083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.14250133434931436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.1426346699396769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.13987200458844504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.0902453362941742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.08966933687527974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.09150933225949605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.07484266658624013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.07543466488520305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.0728959987560908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.07714666426181793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.07481066882610321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.07283733288447063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.07879466811815898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.07969599962234497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.08104533453782399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.07892799874146779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.07878933350245158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.08147199948628743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.05166399975617727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.05412800113360087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.0470719983180364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.046223998069763184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.04806933303674062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.048298666874567665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.15871466199556986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.04901866614818573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.049770668148994446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.04881600042184194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.031258667508761086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.030741333961486816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.2553546627362569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.03183999905983607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.031530665854612984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.03320533285538355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.03316800047953924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.027114666998386383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.026122666895389557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.025978667040665943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,0.8287573655446371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,0.8215146859486898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,0.8518293698628744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.05026133358478546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,0.8457120259602865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,0.8469173113505045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,0.8370560010274252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,0.8985919952392578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,0.8857279618581136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,1.044533332188924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,0.9157493114471436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,0.8778080145517985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.045573314030965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.5014133453369141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.5282400051752726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.4312906662623088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.44597868124643963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.4248480002085368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.43754132588704425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.4336373408635457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.4332053263982137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.4569600025812785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.44946134090423584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.5052693287531534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.5077226559321085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.4771039883295695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.26895999908447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.2642880082130432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.2751680016517639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.22633065780003866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.22449066241582236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.21995733181635538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.23021332422892252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.22851733366648355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.22791467110315958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.2401813268661499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.23799467086791992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.2499199906984965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.46908267339070636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.24157865842183432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.23827733596165976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.2395306626955668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.14379200339317322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.14818132917086282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.11972266435623169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.11884267131487529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.11552000045776367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.12112533052762349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.11973333358764648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.1204746663570404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.12784533699353537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.1260533332824707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.13166933258374533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.12935466567675272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.13013866543769836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.12962133685747781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.0851039985815684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.08514133095741272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.06815466781457265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.0661653329928716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.06841066479682922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.46007466316223145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.06667733192443848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.07333333293596904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.07468266785144806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.07246933380762736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.07216000060240428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.04810666541258494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.040021332601706185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.04137066751718521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.0439573327700297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.043738668163617454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.043525333205858864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.04331733286380768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.029898665845394135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.02757333219051361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.14552533626556396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.027679999669392902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.030320001145203907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.03046400099992752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.029909332593282063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.02996266633272171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.030421334008375805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.08437866965929668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.024682665864626568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.029109333952267964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.02384000023206075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.020810666183630627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.022005334496498108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.0296426663796107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.4644426504770915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.4591093460718791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.459445317586263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.49526933828989667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.4703679879506429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.4657919804255168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.4947093327840169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.491706649462382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5326879819234213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.49768535296122235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.49296001593271893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.542960007985433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.28171199560165405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.2780960003534953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.28914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.2411359945933024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.2385866641998291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.23601067066192627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.2463093400001526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.2442880074183146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.2405760089556376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.2561653256416321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.2555786569913228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.26095465819040936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.25729066133499146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.2549706697463989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.1511679987112681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.15052266915639242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.1558986703554789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.12984533111254373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.12797866264979044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.13130133350690207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.13142933448155722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.13117866714795431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.12959466377894083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.1383840044339498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.13565333684285483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.13910399874051413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.13769599795341492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.13846400380134583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.13853866855303446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.08463999629020691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.02383466561635335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.08603733777999878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.07055999835332234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.07039999961853027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.06870399912198384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.07268266876538594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.0849173367023468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.07125333448251088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.06831466654936473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.07487466434637706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.07354133327802022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07550933460394542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.07540800174077351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.07540800174077351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.0780320018529892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.04839999973773956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.043663998444875084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.04432533184687296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.045925334095954895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.04576000074545542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.031002665559450786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.26691200335820514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.04642133414745331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.01773333301146825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.302784005800883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.3035573363304138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.3052639961242676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.3089066743850708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.3049439986546834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.30452799797058105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.3258986671765645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.3167733351389567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.3253653248151143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.3246346712112427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.31752000252405804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.3280106584231059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.18081067005793253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.17863466342290243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.15956800182660422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.15969600280125937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.15878400206565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.16166399916013083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.16082666317621866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.1590720017751058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.1688693364461263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.1672053337097168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.16961065928141275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.1693333387374878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.16852800051371256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.18521066506703696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.10097066561381023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.09949866930643718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.08922132849693298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.08893866340319316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.08686400453249614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.08914666374524434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.08574933807055156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.09301867087682088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.09131200114885966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.1032319962978363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.09339732925097148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.09240532914797465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.09345066547393799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.05258133510748545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.05574400226275126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.05009066561857859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.05198933184146881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.1713493267695109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.051856001218159996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.0537120004494985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.05236266553401947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.052149335543314614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.051829333106676735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.03634133438269297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.035536001125971474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.03596800069014231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.023728000621000927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.024821333587169647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.024911999702453613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.018751999984184902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.016176000237464905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.08832533160845439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.2248106598854065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.22411733865737915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.22696532805760702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.2251573403676351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.224346657594045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.22756266593933105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.2323360045750936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.23909332354863486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.23211199045181274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.24096532662709555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.1323199967543284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.1304586629072825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.13664533694585165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.12016000350316365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.23229332764943442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.11988266309102376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.1218826671441396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.11992533008257548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.11936533451080322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.23297599951426187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.12241599957148235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.12480533123016357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.12385066350301106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.12434132893880208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.1279093325138092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.07223999996980031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.0720960001150767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.07403733332951863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.06710400183995564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.06623466809590657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.0689386675755183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.12340799967447917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.06613866488138835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.06894933183987935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.06881066660086314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.07034666836261749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.07030400137106578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.07050133248170216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.07035199801127116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.04404266675313314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.043749332427978516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.04408533374468485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.042650664846102394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.04192533095677694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.042821332812309265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.06843733290831248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.04378133515516917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04414399961630503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.043663998444875084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.027621333797772724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.027786667148272198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.02827200045188268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.029850666721661884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.019839999576409657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.016389333953460056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.1218239963054657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.19248000780741373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.19171732664108276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.19150932629903158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.016501333564519882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.19093332688013712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.19238932927449545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.1940000057220459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.19781333208084106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.19569067160288492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.195850670337677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.19793599843978882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.19155732790629068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.10548266768455505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.10924800237019856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.10139200091362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.10132799545923869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.10148800412813823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.10172800223032634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.10339200496673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.10147200028101604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.10326932867368062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.10445333520571391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.10385066270828247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.10638399918874104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.1032533347606659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.1055466632048289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.1912320057551066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.06152533491452535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.06223999957243601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.059978668888409935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.0606879989306132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.06025599936644236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.05858666698137919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.0606879989306132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.062261333068211876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.10493333141009013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.060965334375699363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.06037333110968272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03756266583998998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.037087999284267426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.06061333417892456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.03677866607904434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.02638400097688039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.027061333258946735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.026181332767009735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.020261333634455998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.026250667870044708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.01783466711640358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,0,0.16061866283416748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,fp8,fp8,0,0.15665599703788757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,0,0.16083733240763345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,fp8,fp8,0,0.15693333745002747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,0,0.16086933016777039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,0,0.16244799892107645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,4,128,0,1,fp8,fp8,0,0.15839466452598572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,0,0.1625493367513021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,0,0.16267200311024985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,8,128,0,1,fp8,fp8,0,0.15825066963831583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,0,0.16083733240763345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,0,0.0888320008913676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,0,0.08704533179601033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,0,0.08706133564313252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,1,128,0,1,fp8,fp8,0,0.08520000179608662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,0,0.08750933408737183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,0,0.08905599514643352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,2,128,0,1,fp8,fp8,0,0.0869813362757365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,0,0.08729599912961324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,0,0.08885866403579712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,0,0.16064533591270447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,fp8,fp8,0,0.08682666222254436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,0,0.08687466382980347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,0,0.08717866738637288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,8,128,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,0,0.05207466582457224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,0,0.052101333936055504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,32,128,0,1,fp8,fp8,0,0.05186133086681366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,0,0.0517493337392807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,0,0.052373334765434265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,1,128,0,1,fp8,fp8,0,0.050373335679372154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,0,0.05175999800364176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,2,128,0,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,0,0.05231999854246775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,4,128,0,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,0,0.05223466455936432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,0,0.05221333106358846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,32,8,128,0,1,fp8,fp8,0,0.050901333491007485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,fp8,fp8,0,0.03398400048414866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,0,0.033861334125200905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,1,128,0,1,fp8,fp8,0,0.03230399886767069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,0,0.034234667817751564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,0,0.08717333277066548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,8,128,0,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,32,128,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,0,0.02587199956178665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,0,0.034234667817751564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,2,128,0,1,fp8,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,4,128,0,1,fp8,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,0,0.024959998826185863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,8,128,0,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,32,128,0,1,fp8,fp8,0,0.01989866668979327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,fp8,fp8,0,0.020410666863123577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,32,1,128,0,1,fp8,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,2,128,0,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,0,0.019813333948453266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,4,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,8,128,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,32,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,1,128,0,1,fp8,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,2,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,4,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,32,8,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,32,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,1,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,2,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,32,8,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,32,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,0,0.016229332735141117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,32,8,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,11.060911814371744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,11.294612884521484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,8.413157145182291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,11.109509785970053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,8.478442509969076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,11.887232462565104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,11.091803232828775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,10.88488515218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,8.617269515991211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,11.542981465657553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,12.010084788004557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,8.591338475545248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,5.93174425760905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,4.493546803792317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,5.79209582010905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,5.62709363301595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,5.973546981811523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,4.31277338663737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,5.3560638427734375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,4.374815940856934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,5.771642684936523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,5.376319885253906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,4.34878412882487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,5.418453216552734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,5.877536137898763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,2.7064107259114585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,2.9022401173909507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,5.640026728312175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,2.3450613021850586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,2.701103846232096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,4.344160079956055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,2.8067518870035806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,2.2686773935953775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,2.8644374211629233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,2.73583984375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,2.636160055796305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,2.8003145853678384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,2.290714740753174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,2.2703466415405273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,2.684960047403971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,1.421237309773763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,2.7423038482666016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,1.4132374127705891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,1.383471965789795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,2.2890987396240234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,1.4196319580078125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,1.3885013262430828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,1.370357354482015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,1.393631935119629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,1.252837340037028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,1.4016960461934407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,1.2643893559773762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,1.2587573528289795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,1.4016319910685222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,1.3922613461812336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,1.267738660176595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,1.372970740000407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,6.5873972574869795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,5.011498769124349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,6.07758903503418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,6.457573572794597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,5.059967994689941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,6.565066655476888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,6.331605275472005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,6.497392018636067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,5.045392036437988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,6.8876800537109375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,3.374080022176107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,5.094778696695964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,6.265338897705078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,3.221247990926107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,2.719301223754883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,3.1377814610799155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,2.9524052937825522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,2.5971627235412598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,3.1984640757242837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,3.0994132359822593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,3.257802645365397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,3.3446292877197266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,2.6282827059427896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,2.6048107147216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,3.0014187494913735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,3.2461652755737305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,2.6290666262308755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,1.753717263539632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,1.4462933540344238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,1.5631945927937825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,1.5383893648783367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,1.3899466196695964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,1.6660106976826985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,1.5476160049438477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,1.5671040217081706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,1.3937865893046062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,1.5419680277506511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,1.5655040740966797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,1.3924907048543294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,1.5448959668477376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,1.6078613599141438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,1.4057812690734863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,0.8817333380381266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,0.8540000120798746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,0.8582133452097574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,0.7891626358032227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,0.8623680273691813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,0.8579626878102621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,0.8128106594085693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,0.7923040390014648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,0.8642240365346273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,0.8718667030334473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,0.8629493713378906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,0.8855466842651367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,0.7975467046101888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,0.7947733402252197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,0.8784266312917074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,4.549573262532552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,4.5648854573567705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,3.6422131856282554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,4.454506556193034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,3.6653013229370117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,4.419850667317708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,4.518719991048177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,4.607450803120931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,3.6646931966145835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,4.663925488789876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,4.246037483215332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,3.70798397064209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,2.0016640027364097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,2.3162293434143066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,2.1534345944722495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,2.108901341756185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,2.347263971964518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,1.8980426788330078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,2.1153759956359863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,2.177834669748942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,2.162090619405111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,2.1265759468078613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,1.9072906176249187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,2.206223964691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,2.269200007120768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,1.9353386561075847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,1.293621301651001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,1.175872008005778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,1.071781317392985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,1.1277973651885986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,1.8987147013346355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,1.1217439969380696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,1.028223991394043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,1.1278719902038574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,1.1405386924743652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,1.1371253331502278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,1.1354506810506184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,1.0764373143513997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,1.1420746644337971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,1.1394506295522053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,1.0409599939982097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,1.0868266423543294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,0.666752020517985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,0.6468426783879598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,0.6453440189361572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.5917919874191284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,0.6446666717529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,0.6721706390380859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,0.6461973190307617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.5956159830093384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.615237315495809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,0.6478186845779419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,0.6501973470052084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.5990933179855347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,0.6694773038228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,0.6571573416392008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.6010239919026693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,4.896496136983235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,6.247605641682942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,6.123829523722331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,6.254133224487305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,5.970959981282552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,4.894970575968425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,6.1733442942301435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,6.303637186686198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,4.920789400736491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,6.243770599365234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,3.0446561177571616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,6.18397839864095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,2.972890535990397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,2.643359978993734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,4.960629463195801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,2.789589246114095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,2.4798080126444497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,2.8646453221639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,2.9157387415568032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,2.48746665318807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,2.9711360931396484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,2.922778765360514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,2.5037280718485513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,2.9552106857299805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,2.9444265365600586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,2.9699039459228516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,1.531493345896403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,1.519312063852946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,1.404655933380127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,1.439893404642741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,1.528656005859375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,1.304085334142049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,1.4349385897318523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,1.4435733159383137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,1.309381326039632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,1.4522560437520344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,1.4484160741170247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,1.3133653004964192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,1.4569066365559895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,1.509717305501302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,1.3240373134613037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,0.8175626595815023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,0.8834613164265951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,0.75709335009257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,0.7821173667907715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,0.7821706930796305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,2.5291946729024253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,0.7185280323028564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,0.7846346696217855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,0.7937973340352377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,0.7204906940460205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,0.7900213400522867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,0.7900213400522867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,0.7246826489766439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,0.8202826976776123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,0.7974879741668701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.47516798973083496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.5093546708424886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.4835946559906006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.4552853504816691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.49188800652821857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.42499732971191406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.454645315806071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.4652479887008667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.426202654838562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.4631040096282959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.4639413356781006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.42683732509613037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.46650667985280353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.4703413248062134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,0.7582826614379883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.4307039976119995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,3.000213305155436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,3.5187625885009766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,3.6518774032592773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,3.017903963724772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,3.4343039194742837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,3.5495627721150718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,3.540752092997233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,3.044133186340332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,3.6825440724690757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,1.8827039400736492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,3.6748374303181968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,1.8719627062479656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,3.537407875061035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,3.067866643269857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,1.7672106424967449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,1.7106399536132812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,1.6622026761372883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,1.7257332801818848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,1.7463733355204265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,1.7615092595418294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,1.5426506996154785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,1.7700799306233723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,1.557637373606364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,1.7119359970092773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,1.7994613647460938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,1.57477871576945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,0.9433226585388184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,1.0496586958567302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,0.8787946701049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,0.8951306343078613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,0.9022186597188314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,1.5476694107055664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,0.8987680276234945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,0.9069333076477051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,0.8234720230102539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,0.9025066693623861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,0.9087200164794922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,0.8297866980234782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,0.9116693337758383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,0.9189120133717855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,0.8389973640441895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,0.5270453294118246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,0.5333173274993896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.4907093445460002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.4979999860127767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.4994560082753499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.46190400918324787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,0.8203093210856119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.5001546541849772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.4636586507161458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.5052586793899536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.5158079862594604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.4660213390986125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.5154560009638468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.5128159920374552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.47286399205525714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.3229493300120036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.296234667301178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.30300267537434894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.3034506638844808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.27852799495061237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.3059626619021098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.302511990070343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.2790293296178182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.30290132761001587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.309168001015981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.3212266763051351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.2826773325602214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.31038933992385864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.3118720054626465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.5006986856460571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,3.493093490600586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,3.6026827494303384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,3.082634607950846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.28677332401275635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,3.613642692565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,3.568549474080404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,3.1051146189371743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,3.4863465627034507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,3.521402676900228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,3.7045653661092124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,3.513279914855957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,3.1705118815104165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,1.8634986877441406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,1.8754666646321614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,1.7030240694681804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,3.129232088724772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,1.7107359568277996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,1.556597391764323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,1.7157813707987468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,1.7193172772725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,1.5672213236490886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,1.725327968597412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,1.729365348815918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,1.578447977701823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,1.710602601369222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,1.7513386408487956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,1.7625013987223308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,0.9780480066935221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,1.602981408437093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,1.0014080206553142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,0.9451413154602051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,0.901909351348877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,0.8971146742502848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,0.8161280155181885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,0.8952266375223795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,0.9058132966359457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,0.8187039693196615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,0.907696008682251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,0.9093120098114014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,0.9103253682454427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,0.9191359678904215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,0.8383893171946207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.5251146554946899
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.4818506638209025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.48366399606068927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.48204267024993896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.4463306665420532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,0.8235092957814535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.48868799209594727
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.4469386736551921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.5158400138219198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.4997599919637044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.4499359925587972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.5089120070139567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.5001813173294067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.4576053222020467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.2977386713027954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.30397866169611615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.2802933255831401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.28590933481852215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.27742934226989746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.258842666943868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.2800106604894002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.28379199902216595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.259717325369517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.2879093289375305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.28624532620112103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.4883466561635335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.28653866052627563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.48951999346415204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.2672746578852336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.187008003393809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.1894879937171936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.26387200752894086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.18077333768208823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.18058133125305176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.16564266880353293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.18013866742451987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.17977599302927652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.1658399999141693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.17723733186721802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.1791306734085083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.1804693341255188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.18268799781799316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.17999466260274252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.16663466890652975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,2.1715680758158364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,2.170442740122477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.28684266408284503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,1.972981293996175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,2.218752066294352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,2.155072053273519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.1665333310763041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,1.9876373608907063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,2.205344041188558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,2.187509377797445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,2.0094879468282065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,2.271872043609619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,1.206447998682658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,2.251669406890869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,2.0471040407816568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,1.3007413546244304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,1.1231733163197835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,1.1216213703155518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,1.103765328725179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,1.0066293080647786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,1.1081493695576985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,1.1334506670633953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,1.0119413534800212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,1.119050661722819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,1.122058629989624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,1.128661314646403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,1.1359626452128093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,1.0427947044372559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,0.6514666477839152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.5926880041758219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,0.5879253149032593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,0.5847253402074178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.5339413483937582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,0.5851200024286906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,0.6346133152643839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.5382346709569296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,0.5916320085525513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,0.5930293401082357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.5423413515090942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,1.020965337753296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,0.6007200082143148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.5512319803237915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.34890135129292804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.36161065101623535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.326474666595459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.32314133644104004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.32150399684906006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.29815999666849774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.32145599524180096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.3277546763420105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.29793065786361694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.3267680009206136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.326255997021993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.30128000179926556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.3294559915860494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.341541330019633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.30619200070699054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.21176532904307047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.2099413275718689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.1950506567955017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.18666134277979532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.19040000438690186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,0.5999413331349691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.17299199104309082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.19041067361831665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.18574400742848715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.17308266957600912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.192138671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.18535999457041422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.1777013341585795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.1981173356374105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.19532267252604166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.13170133034388223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.13239999612172446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.12763733665148416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.12661866346995035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.1256480018297831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.11770666639010112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.12657599647839865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.12563199798266092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.11797333757082622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.1269599994023641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.12587733070055643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.11995733777681987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.12569600343704224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.1276533305644989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,0.5886346499125162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,2.415232022603353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,2.440469264984131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.11960533261299133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,2.174586613972982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,2.384602705637614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,2.395088036855062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,2.1931947072347007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,2.4618132909139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,2.4741493860880532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,2.2514826456705728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,2.5162293116251626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,2.4851892789204917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,1.417301336924235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.2900799910227458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,1.1965280373891194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,1.3347466786702473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,1.1962719758351643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,2.2598986625671387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,1.091754674911499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,1.1970240275065105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,1.2101759910583496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,1.1027039686838787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,1.2194080352783203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,1.218549331029256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,1.1153600215911865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,1.2333386739095051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,1.2505333423614502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,0.6885920365651449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,0.6990293661753336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,0.6458133459091187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,0.6175093253453573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,0.6212906837463379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.5687573353449503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,0.6231253147125244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,0.6271093289057413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.5723359982172648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,0.628437320391337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,0.6321066617965698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.1415680249532063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,0.6404213507970175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,0.6467626492182413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.5915360053380331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.37806399663289386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.33416001001993817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.3349813222885132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.5790239969889323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.3084160089492798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.3417653242746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.33663467566172284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.3702666759490967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.3102346658706665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.33876800537109375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.3470613161722819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.31486932436625165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.3444480101267497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.3477333386739095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.3200426697731018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.21179733673731485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.1962826649347941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.18392000595728555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.18893333276112875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.17505067586898804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.18873600165049234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.18780267238616943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.17700799306233725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.19003732999165854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.19458132982254028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.3428586721420288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.17909334103266397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.20136533180872598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.1967946688334147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.18196799357732138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.20907733837763467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.11966933806737264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.11595199505488078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.10763733585675557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.1125333309173584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.11466667056083679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.10541866223017375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.11364799737930298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.1157973309357961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.11565867066383362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.11753066380818684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.12602667013804117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.10973866780598958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.08476266264915466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.0807360013326009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.08333866794904073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.08389866352081299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.07677866518497467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.08197866876920064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.08252266546090443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.07648000121116638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.0816480020682017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.0827946662902832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.07668266693751018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.08295999964078267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.08317333459854126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.105621337890625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,1.621664047241211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.12788800398508707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,1.6262720425923665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,1.4772426287333171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,1.6328907012939453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,1.6356479326883953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,1.4929653803507488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,1.686031977335612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,1.680997371673584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,1.5345600446065266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,1.673850695292155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,1.6876586278279622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,0.9211626847585043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,0.9292799631754557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,0.8582932949066162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,0.8135733604431152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,0.744815985361735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,0.815125306447347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,0.8273813724517822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,0.751418670018514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,0.829200029373169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,0.810090700785319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,0.8342186609903971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,0.7620746294657389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,0.8396373589833578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,0.8496853510538737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.4793013334274292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.4856479962666829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.44866665204366046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.43057068188985187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.4291573365529378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.3919839859008789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.425872008005778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.42948798338572186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.43182400862375897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.4355253378550212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.39904534816741943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.44014934698740643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,0.7801226774851481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.4461386601130168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.4095093409220378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.2592853307723999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.2697920004526774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.23336533705393472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.23120532433191934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.21478400627772012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,1.5387360254923503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.2340373396873474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.21570666631062826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.24537599086761475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.2373866637547811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.21998933951059976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.24254933993021646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.2341066598892212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.22476265827814737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.15085867047309875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.154448002576828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.2365866700808207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.12731732924779257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.12762666742006937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.11831999818483989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.39483733971913654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.12826133767763773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.12905599673589072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.1218346655368805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.12973866860071817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.13199466466903687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.12435199817021687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.13563199838002524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.13578133781750998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.12994666894276938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.08960533142089844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.09066133697827657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.08932800094286601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.08309333523114522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.08330133557319641
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.07840533554553986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.24416534105936685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.08477866649627686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.07833066582679749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.0846720039844513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.08473599950472514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.07912533481915791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.08513599634170532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.08507200082143147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.08088533580303192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.05827199916044871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.058575997749964394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.08306133250395457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.055829331278800964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.058378666639328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.056373332937558494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.14231466253598532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,1.6786079406738281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,1.6753333409627278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,1.6543839772542317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,1.6957813898722331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,1.6948533058166504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,1.768821398417155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,1.7184319496154785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,1.730954647064209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,1.828229268391927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,1.8225599924723308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,1.816864013671875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,1.7194933891296387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,0.9889280001322428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,0.8505067030588785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,0.8572479883829752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,0.9869546890258789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,0.8398186365763346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,0.8673813343048096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,0.9745386441548666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,0.8477706909179688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,0.8819680213928223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,0.8760213057200114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,0.8626080354054769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,0.9084053039550781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,0.8964373270670573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.5131306648254395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.5013759930928549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.5091840028762817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.4386826753616333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.4410133361816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.43110934893290204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.44793065388997394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.4433279832204183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,0.8663840293884277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.45131198565165204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.449834664662679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.44330132007598877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.4625226656595866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.4602400064468384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.45633598168691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.4349173307418823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.265066663424174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.2675039966901143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.23507199684778848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.23519466320673624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.22219733397165933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.23875733216603598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.2360853354136149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.23004267613093057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.2385653257369995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.24111467599868774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.27318400144577026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.24782933791478476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.24596800406773886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.2409813404083252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.15001066525777182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.14831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.13006933530171713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.12967466314633688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.12154666582743327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.12916800379753113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.13078932960828146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.1506239970525106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.12637333075205484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.13160000244776407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.13242133458455405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.12993066509564719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.1387999951839447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.13527466853459677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.13546133041381836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.2323360045750936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.08913600444793701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,0.9082612991333008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.07734933495521545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.07123200098673503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.07745066781838734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.07699200014273326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.07685333490371704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.07788800199826558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.07152000069618225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.07798933486143748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.0784800002972285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.0544106662273407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.053264002005259194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.07669333120187123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.053599998354911804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.05226666728655497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.04836800197760264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.052202666799227394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.048351998130480446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.05388266841570536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.036389333506425224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.03446933378775915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.03736533224582672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.033813332517941795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.03755733370780945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.036015999813874565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.08891200025876363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,1.4232746760050456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,1.4184853235880535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,1.4178133010864258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,1.47869873046875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,1.544314702351888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,1.471834659576416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,1.487061341603597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,1.5667519569396973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,1.5814560254414876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,1.4873332977294922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,1.5638079643249512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,0.8576906522115072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,1.589461326599121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,0.8434186776479086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,0.8711733023325602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,0.7233599821726481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,0.7214399973551432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,0.7345120112101237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,0.7368853092193604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,0.730842669804891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,0.7471093336741129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,0.7440106868743896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,0.7446880340576172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,0.722815990447998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,0.7663733164469401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,0.7953173319498698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.4578453302383423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.4308053255081177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.4465706745783488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.37269866466522217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.37274666627248126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.37037865320841473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.37985066572825116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.379802664120992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.37373868624369305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.3842506806055705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.38496001561482746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.3833386500676473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.39514132340749103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.39345065752665204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.3963786760965983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.23034665981928507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.2353066603342692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.1982133388519287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.19872534275054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.1908373236656189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.2025279998779297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.20055999358495077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.1984000007311503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.20432533820470175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.20523732900619507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.19988799095153809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.21299733718236288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.23518399397532144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.20993600289026895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.12600533167521158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.1297920048236847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.10726933677991231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.10897599657376607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,0.7859679857889811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.10954667131106059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.10967999696731567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.13075199723243713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.11155733466148376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.1106666624546051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.11012799541155498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.11565333604812622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.10424533486366272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.11637333035469055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.07610133290290833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.07346133391062419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.11514133214950562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.07855466504891713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.21070400873819986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.10610666871070862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.06594133377075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.06570133566856384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.060602664947509766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.06450133522351582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.06520000100135803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.060085331400235496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.06657066444555919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.06684799989064534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.04422399898370107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.044106667240460716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.06530133386452992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.04428799947102865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.04483200112978617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.04218666752179464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.04553600152333578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.041989331444104515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.04576000074545542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.04456533491611481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.04206933577855428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.03194666653871536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.04452799757321676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.028783999383449554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.0647626668214798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.04566933214664459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.031311998764673867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.030933332939942677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.030576000610987347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.6453706820805868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.6452000141143799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.6540480057398478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,0.6549493471781412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.6560800075531006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.6625920136769613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,0.667242685953776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,0.667418638865153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.6749386787414551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,0.6961812973022461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,0.6870240370432535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.7393386363983154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.3932480017344157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.41418135166168213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.33063467343648273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.34276266892751056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.025818665822347004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.3378346761067708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.33643198013305664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.34276266892751056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.40486399332682294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.3433706760406494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.34751466910044354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.3543200095494588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.3536906639734904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.3625919818878174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.21665600935618082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.2131040096282959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.22036266326904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.17946134010950723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.17829867204030356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.3445226748784383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.18085867166519165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.17933332920074463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.18068800369898477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.18266665935516357
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.18244266510009766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.18239466349283853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.3362826506296794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.1871839960416158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.19090133905410767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.11870933572451274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.11763200163841248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.1218933363755544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.09897599617640178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.0992693305015564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.19056000312169394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.09347200393676758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.1009173293908437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.0992746651172638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.09920533498128255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.10172800223032634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.10149332880973816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.10115733742713928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.10556800166765849
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.10471999645233154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.10744000474611919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.067071999112765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.06587199866771698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.07157866656780243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.058431997895240784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.0547680010398229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.058650667468706764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.05904000004132589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.05482133229573568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.059215997656186424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.05386666456858317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.05991466840108236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.05869866907596588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.17610132694244385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.042634665966033936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.04385599990685781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.040991999208927155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.03999999910593033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.0408746674656868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.04014399896065394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.0397119993964831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.03917866696914037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.03859733293453852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.02923733244339625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.060122668743133545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.028197333216667175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.027962667246659596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.026362667481104534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.029765332738558452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.02502399931351344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.05862933397293091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.023962666591008503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.35900266965230304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.36054933071136475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.36735999584198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.3660800059636434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.36559998989105225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.37094934781392414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.3718506495157878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.37194665273030597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.37646400928497314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.38414935270945233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.38091198603312176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.3973333438237508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.22612265745798746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.22104533513387045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.23122133811314902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.020453333854675293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.18995199600855509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.18680532773335776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.19216533501942953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.1932906707127889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.1925706664721171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.19518399238586426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.19337066014607748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.19596266746520996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.20012799898783365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.19011733929316202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.20012799898783365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.2053813338279724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.12239467104276021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.1276533305644989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.1042080024878184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.10136000315348308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.10421866178512573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.10505066315333049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.10591999689737956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.10569066802660625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.10804266730944316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.10807999968528748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.11170666416486104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.11404800415039062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.12006400028864543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.06816000243028005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.07458666463692983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.060864001512527466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.06020266811052958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.11000532905260722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.06072533130645752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.06211199859778086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.06196799874305725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.06247999767462412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.060549333691596985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.04063999901215235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.04002666721741358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.03832533210515976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.038719999293486275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.10571199655532837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.028581333657105763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.02890666574239731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.028165332973003387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.02180800090233485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.02239466706911723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.022533332308133442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.07090666890144348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.23701866467793783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.23688532908757529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.23553067445755005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.2402133345603943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.23941334088643393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.24246933062871298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.24090667565663657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.24136000871658325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.24585066239039102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.24740266799926758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.24731733401616415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.1443946659564972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.14850133657455444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.12804800271987915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.12668800354003906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.12359467148780823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.12807466586430868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.12812800208727518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.12904000282287598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.12961600224177042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.1301653285821279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.13177067041397095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.1325279970963796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.2539733250935872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.13211199641227722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.14260266224543253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.08019199967384338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.0851093331972758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.07264000177383423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.07237866520881653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.07250133156776428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.07195200026035309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.06824533144632976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.07272533575693767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.0730453332265218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.06912533442179362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.0731573353211085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.07336533566315968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.07397866745789845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.1365120013554891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.045925334095954895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.044346665342648826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.04268800218900045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.044693330923716225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.04269866645336151
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.045141334335009255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.04497066636880239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.04363200068473816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.030975999931494396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.047135998805363975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.030752000709374745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.031023999055226643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.029717333614826202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.03166933357715607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.029845332105954487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.01578666642308235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.01637866720557213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.01573333392540614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.17483200629552206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.17591466506322226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.17309866348902384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.17620799938837686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.17509865760803223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.17709332704544067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.17534399032592773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.17973866065343222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.17938133080800375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.17951999107996622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.1852160096168518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.10346133510271709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.10334933797518413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.10951466361681621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.09702400366465251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.09300800164540608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.17765865723292032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.09587732950846355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.09382933378219604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.0972106655438741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.09542399644851685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.0937546690305074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.09666132926940918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.09734400113423665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.09750399986902873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.05621333420276642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.056346664826075234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.09531733393669128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.05579733351866404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.05799466868241628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.05442133545875549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.0580320010582606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.05726400017738342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.05545066793759664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.05817066629727682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.056426664193471275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.05610666672388712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.03753600021203359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.09520533680915833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.036874666810035706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.03748266647259394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.05575466652711233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.028186666468779247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.017637333522240322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.01995733380317688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.14723199605941772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.14669332901636759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.14648000399271646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.14681067069371542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.1483466625213623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.14847466349601746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.14779200156529745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.14641599853833517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.14904533823331198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.1483840048313141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.15010666847229004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.08294933537642162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.14595199624697366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.08107199768225352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.08306133250395457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.08109866579373677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.08301333089669545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.08292800188064575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.08132266501585643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.08163733283678691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.0831520011027654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.08065066734949748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.08229866623878479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.08284266789754231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.08247999846935272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.04953599969546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.04966933528582255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.04990399877230326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.05006400247414907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.05004799862702688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.033610666791598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.05041066805521647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.04910933474699656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.033957332372665405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.031850665807724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.033701332906881966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.0340693344672521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.033999999364217125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.033861334125200905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.03391999999682108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.022597332795461018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.022778667509555817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.02295999974012375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.08261333405971527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,float16,float16,0,0.12571199735005698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,float16,fp8,0,0.12592533230781555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,float16,float16,0,0.12570666273434958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,float16,fp8,0,0.12578133742014566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,float16,float16,0,0.12589866916338602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,float16,fp8,0,0.12615467111269632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,4,128,0,1,fp8,fp8,0,0.1218826671441396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,float16,float16,0,0.12596266468365988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,float16,fp8,0,0.12574932972590128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,8,128,0,1,fp8,fp8,0,0.12376532951990764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,1,128,0,1,fp8,fp8,0,0.12184000015258789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,0,0.07107733190059662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,fp8,fp8,0,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,float16,float16,0,0.07049066821734111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,float16,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,1,128,0,1,fp8,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,float16,float16,0,0.0703359991312027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,float16,fp8,0,0.07087466617425282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,2,128,0,1,fp8,fp8,0,0.06840533514817555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,float16,float16,0,0.07045333087444305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,float16,fp8,0,0.07061866422494252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,4,128,0,1,fp8,fp8,0,0.06836266815662384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,float16,float16,0,0.07020799815654755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,float16,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,8,128,0,1,fp8,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,fp8,fp8,0,0.04229333500067393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,24,2,128,0,1,fp8,fp8,0,0.12190399567286174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,fp8,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,float16,float16,0,0.043696001172065735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,0,0.07080533107121785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,2,128,0,1,fp8,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,float16,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,float16,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,8,128,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,float16,fp8,0,0.02994133283694585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,1,128,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,4,128,0,1,float16,float16,0,0.04387733340263367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,float16,fp8,0,0.02976000060637792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,2,128,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,float16,fp8,0,0.03033066789309184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,4,128,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,float16,fp8,0,0.03032533327738444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,8,128,0,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,24,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,float16,float16,0,0.023738667368888855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,1,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,float16,fp8,0,0.024677333732446034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,2,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,4,128,0,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,24,8,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,24,128,0,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,1,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,2,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,4,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,24,8,128,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,24,1,128,0,1,float16,float16,0,0.04370133578777313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,1,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,2,128,0,1,fp8,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,4,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,24,8,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,24,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,2,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,4,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,8,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,24,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,24,8,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,24,1,128,0,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,5.606218973795573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,7.721285502115886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,7.376010894775391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,7.315877278645833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,7.360703786214192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,5.6273759206136065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,7.474469502766927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,5.697765350341797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,7.255498886108398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,7.585642496744792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,3.5909172693888345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,7.514474868774414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,5.732863744099935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,3.8811305363972983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,2.9939359029134116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,3.5933653513590493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,3.7731946309407554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,2.899322509765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,3.5651413599650064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,4.287541389465332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,2.9063520431518555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,3.6479625701904297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,3.393648147583008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,2.917429288228353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,3.4733012517293296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,1.7600800196329753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,2.9601920445760093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,3.6874240239461265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,1.7759733200073242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,1.5980854034423828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,1.6911892890930176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,1.5473546981811523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,1.7244480450948079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,1.7038027445475261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,1.5497600237528484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,1.736037254333496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,1.778549353281657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,1.8042346636454265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,1.7248586018880208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,1.7284107208251953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,1.566816012064616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,1.5696853001912434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,0.9849119981129965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,0.8987360000610352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,0.9718186855316162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,0.9582080046335856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,0.8761119842529297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,0.9877706368764242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,0.9688746929168701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,0.9537279605865479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,0.8760800361633301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,0.960757335027059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,0.9666186968485514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,0.8802933692932129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,0.968010663986206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,0.970858653386434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,0.888213316599528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,3.917776107788086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,4.191754659016927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,3.373296101888021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,4.331119855244954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,4.012639999389648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,4.401845296223958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,3.391615867614746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,4.312410672505696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,4.1944427490234375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,3.391903877258301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,2.0357492764790854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,3.434666633605957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,2.0482239723205566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,1.8265760739644368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,1.9336907068888347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,4.286304155985515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,1.93013334274292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,1.7570187250773113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,2.0472373962402344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,1.963754653930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,1.75601593653361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,1.9825013478597004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,2.0237600008646646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,1.7657972971598308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,1.9809600512186687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,2.0081440607706704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,1.0757919947306316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,1.7870133717854817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,1.074933369954427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,1.1465919812520344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,1.0451359748840332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,1.0516640345255535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,0.9552640120188395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,1.0604266325632732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,1.0534186363220215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,1.0445120334625244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,1.0713706811269124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,0.9691519737243652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,1.0530239741007488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,1.0782026449839275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,0.9755146503448486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,0.9558453559875488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.5738453467686971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,0.6044906775156657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,0.6014666557312012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.5543946822484335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,0.6027733484903971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,0.623802661895752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,0.6078933477401733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.558458685874939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,0.6271413167317709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,0.6079039971033732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,0.6139146486918131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,0.6158239841461182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,0.6129279931386312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.5643146832784017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.5601066748301188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,2.454026699066162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,2.796426773071289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,2.774517377217611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,2.9612960815429688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,2.8089386622111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,2.4509600003560386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,2.8093439737955728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,2.477349281311035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,2.948261260986328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,1.6549545923868816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,3.021333376566569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,2.5184106826782227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,3.0983200073242188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,1.5229280789693196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,1.339695930480957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,1.408906618754069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,1.421354611714681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,1.2827253341674805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,1.417967955271403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,1.4180960655212402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,1.2870559692382812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,1.4455199241638184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,1.292741298675537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,1.452624003092448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,1.477450688680013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,0.8028799692789713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,0.8779573440551758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,1.4358240763346355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,0.7798933188120524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,0.7745973269144694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,0.7093866666158041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,1.3152053356170654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,0.7814773718516032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,0.78438933690389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,0.7875253359476725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,0.7862293720245361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,0.7392906347910563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,0.7301440238952637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,0.8043786684672037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,0.8066133658091227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.4811466534932454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.4808746576309204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,0.7128586769104004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.4373013178507487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.45867733160654706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.4198773304621379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.4552319844563802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,0.7362399895985922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.4216906627019246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.46214401721954346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.4561386505762736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.4249279896418254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.46056000391642254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.47091734409332275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.43065067132314044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.46264533201853436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.4610559940338135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,3.9983040491739907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,3.871397336324056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,3.2470293045043945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,3.8979199727376304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,3.2659358978271484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,4.089413324991862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,3.914016087849935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,4.011877377827962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,3.2793601353963218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,4.128389358520508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,4.072090784708659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,1.9494239489237468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,3.3484268188476562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,1.9891093571980794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,1.8582827250162761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,1.864554723103841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,1.6669227282206218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,1.8520213762919109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,1.9848106702168782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,1.8427573839823406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,1.892965316772461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,1.854698657989502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,1.6896959940592449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,1.888271967569987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,1.6721493403116863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,1.7204906145731609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,1.0250613689422607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,1.0514986515045166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,0.9390827020009359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,0.9700427055358887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,0.9867093563079834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,0.8901546796162924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,1.9279893239339192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,0.9738399982452393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,0.9902666409810384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,0.8918186823527018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,0.9802079995473226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,0.9858506520589193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,0.8971412976582845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,0.9982240200042725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,1.0002720355987549
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,0.9240319728851318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,0.5923306544621786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.5262346665064493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,0.5375039974848429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,0.542469342549642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.4992213249206543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,0.5442879994710287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,0.5454346736272176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.5010240077972412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,0.5493599971135458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,0.5464373429616293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,0.5701440175374349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,0.5529813369115194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,0.5595359802246094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.5130613247553507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.3413920005162557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.3463786840438843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.31830400228500366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.32869867483774823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.32842665910720825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.3032640020052592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.32734400033950806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.3290506601333618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.3014346758524577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.32924799124399823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.5049706697463989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.3069919943809509
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.33342401186625165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.341103990872701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.3121546705563863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,2.2225066820780435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,1.9971893628438313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.3314453363418579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,2.237050692240397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,2.3130079905192056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,2.2652907371520996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,2.2513012886047363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,2.013264020284017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,2.034053325653076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,2.419269402821859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,2.2610346476236978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,2.32369597752889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,1.2181813716888428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,1.2465226650238037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,1.1807093620300293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,1.1861226558685303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,1.1289386749267578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,1.175551970799764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,1.1160746415456135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,1.1522400379180908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,1.0406560103098552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,1.1462186972300212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,1.1576800346374512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,1.0524746576944988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,1.163925329844157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,1.2040212949117024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,1.0747839609781902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,0.6823893388112386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,0.60043732325236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,0.6122080087661743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,0.6260746717453003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,2.080736001332601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,0.6203200022379557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,0.6434933344523112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.5646453301111857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,0.6191519896189371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,0.6282080014546713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.5704480012257894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,0.6418026685714722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.5630559921264648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,0.6297279993693033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.581658681233724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,0.6207199891408285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.34514133135477704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.35098667939503986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.35859731833140057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.3223839998245239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.35897600650787354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.35259731610616046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.32522133986155194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.3575199842453003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.360426664352417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.3298933307329814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.366592009862264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.36668264865875244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.3346293369928996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.23052799701690674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.23148266474405924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.21613866090774536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.22113066911697388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.2194933295249939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.20568533738454184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.21885865926742554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.22356800238291422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.20349333683649698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.2225386699040731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.21794132391611734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.21983466545740762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.22795732816060385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.20835200945536295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.3750133514404297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.37437331676483154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,2.303797403971354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,2.257754643758138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,2.051461378733317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,2.2562294006347656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,2.066703955332438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,2.3050452868143716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,2.3235626220703125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,2.0880053838094077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,2.352714697519938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,2.3585972785949707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,2.1485066413879395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,1.239888032277425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,1.2418453693389893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,1.1461280186971028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,1.1514240105946858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,2.386064052581787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,1.0462026596069336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,1.1588586966196697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,1.1553973356882732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,1.0499306519826253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,1.1808266639709473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,1.1594239870707195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,1.1413226922353108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,1.2483253479003906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,1.1980000336964924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,1.0944693088531494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,0.6779146989186605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.6041973431905111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,0.5994346539179484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,0.6016266743342081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.5531680186589559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,0.6033066511154175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,0.6100800037384033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.5560319821039835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,0.6151039997736613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,0.6104106505711874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,0.6541279951731364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.563754677772522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,0.6443519989649454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,0.6339733203252157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,1.0641706784566243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.3624746799468994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.36848000685373944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.3272053400675456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.3100000023841858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.3359520037968953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.577674667040507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.31014933188756305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.3343040148417155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.3476800123850505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.34380801518758136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.31511465708414715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.34063466389973956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.35944533348083496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.32582932710647583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.2179093360900879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.33566399415334064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.20011732975641885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.19826134045918783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.2004106640815735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.17986667156219482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.20138132572174072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.20004266500473022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.18305067221323648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.20139199495315552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.20413333177566528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.1874026656150818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.20862932999928793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.20188800493876138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.21900800863901773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.1346826652685801
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.1378933290640513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.13165866335233053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.1332319974899292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.13888532916704813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.12429866194725037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.13159466783205667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.1338986655076345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.12461333473523457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.13012267152468363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.13209600249926248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.12410666545232137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.1930826703707377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.12619733810424805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,1.4478665987650554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,1.4248426755269368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,1.313040018081665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,1.444101333618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,1.4483572642008464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,1.3264373143513997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.13241599996884665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,1.4634772936503093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,1.3450719515482585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.3508213361104329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,1.501914660135905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,0.8129386901855469
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,1.3901119232177734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,0.817967971165975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,0.7527413368225098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,0.7354773680369059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,0.7418933709462484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,0.6770079930623373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,1.5056907335917156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,0.7442986965179443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,0.7444213231404623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,0.7553333441416422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,0.7561013698577881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,0.6929919719696045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,0.7728853225708008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,1.4641119639078777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,0.7149919668833414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,0.6823573112487793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.4394986629486084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.4041279951731364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.3981013298034668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.4008800188700358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.3659093379974365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.39711467425028485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,0.7843093077341715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.4007093509038289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.367573340733846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.41652266184488934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.4079253276189168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.3736213445663452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.4199306567509969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.42236268520355225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.24529600143432617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.24875199794769287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.22807466983795166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.2200266718864441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.22169599930445352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.20618132750193277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.22197866439819336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.44101866086324054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.2253226637840271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.21017066637674967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.22906132539113364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.3835893472035726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.23254932959874472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.21278933684031168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.2364906668663025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.2403093377749125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.14659733573595682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.15005333224932352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.13994133472442627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.13685333728790283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.13267200191815695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.12392533818880717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.1325279970963796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.13562666376431784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.1244053343931834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.1367946664492289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.14055466651916504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.12598933776219687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.1422826647758484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.14056533575057983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.21855467557907104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.09442666172981262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.09678933024406433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.09248000383377075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.09169066945711772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.09330133597056071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.0883840024471283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.09379200140635173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.08803733189900716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.09320533275604248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.09343467156092326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.08943466345469157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.0936906635761261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.09442666172981262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.08898666501045227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.13267200191815695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,1.556378682454427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.09330667058626811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,1.4314826329549153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,1.5756373405456543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,1.608682632446289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,1.4492319424947102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,1.5729866027832031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,1.6321226755777996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,1.6565546989440918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,1.7016480763753254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,1.5525120099385579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,0.8898613452911377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,0.899178663889567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,0.8527786731719971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,0.7918132940928141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,0.8061493237813314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,0.7318933010101318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,0.8063360055287679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,0.7991466522216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,0.7389547030131022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,0.8838666280110677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,0.8308640321095785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,0.752837340037028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,0.8470613161722819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,0.8532000382741293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,0.7875999609629313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.4675840139389038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.47723201910654706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.43570133050282794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.42159998416900635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,1.6371839841206868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.38598934809366864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.4297066529591878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.42956264813741046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.38780800501505536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.43166399002075195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.43115198612213135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.3967466751734416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.44735467433929443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.45551466941833496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.4203039805094401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.25909332434336346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.25919467210769653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.2397813399632772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.22423466046651205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.2319200038909912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.21361066897710165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.2339093287785848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.23010667165120444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.21488000949223837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.23460266987482706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.24130133787790933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.21837333838144937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,1.478549321492513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.24899200598398843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.4123893181482951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.22669865687688193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.14907733599344888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.14019200205802917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.12616533041000366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.12873066465059915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.12276267011960347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.13032000263532004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.13020267089207968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.12171733379364014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.2424160043398539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.13277332981427512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.13268267114957175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.12713066736857095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.13734400272369385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.14031466841697693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.13397333025932312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.08902933200200398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.09041600426038106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.08682133754094441
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.0849173367023468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.08477866649627686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.08065600196520488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.0849226713180542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.086325337489446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.08054399987061818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.08470400174458821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.08678399523099263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.08613866567611694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.08688533306121826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.06126933296521505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.059157331784566246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.05866666634877523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.056618665655454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.059061333537101746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.1486026644706726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.060640002290407814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.06048533320426941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.06010133524735769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.058559998869895935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,1.069754679997762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,1.043183962504069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,0.9642186959584554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,1.074938694636027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,1.0605226357777913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.0854453245798747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.078661362330119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.059690664211908974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,0.9990613460540771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.136853297551473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.1328799724578857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,0.9792906443277994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,0.6227840185165405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,0.6187680164972941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.5747893253962199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.5397066672643026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.5398773352305094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.4960906505584717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.5450453360875448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.545632004737854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.55348801612854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.5570666790008545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,1.0484693050384521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.5710560083389282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.5892373323440552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.5351306597391764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.33059199651082355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.3301546573638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.3055359919865926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.299125333627065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.2922666668891907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.5136906703313192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.29579732815424603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.2924000024795532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.27058666944503784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.2933280070622762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.30219199260075885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.2749066750208537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.5040853420893351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.3102666735649109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.285258670647939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.17939200003941855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.18268799781799316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.2678613265355428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.15502933661142984
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.15563199917475382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.15031466881434122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.15653866529464722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.1600106656551361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.15049599607785544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.1634666621685028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.16524266203244528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.15458133816719055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.16977065801620483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.17244267463684082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.15848533312479654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.10408000151316325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.10533866286277771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.17056000232696533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.10152000188827515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.09300266702969869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.09596266349156697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.08709866801897685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.09273067116737366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.09486400087674458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.09485866626103719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.09529067079226176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.08888000249862671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.09751466910044353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.09733866651852925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.09523733456929524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.06453333298365276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.0632479985555013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.06259199976921082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.06410133341948192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.06422933439413707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.06465599934260051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.059845333298047386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.06413333117961884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.06407466530799866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.06474666794141133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.06061333417892456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.04811733464399973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.04821333289146423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.04771199822425842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.3105226755142212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.0957067012786865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.0955626964569092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,1.0823840300242107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.047338664531707764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.1170026461283367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.1054666837056477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,1.172544002532959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.1457706292470295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.2893493175506592
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,1.185381333033244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,1.1592106819152832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,0.649237314860026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.1978453000386555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,0.6471946636835734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,0.6473919947942098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.5637333393096924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.5690079927444458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.5529546737670898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.5721013148625692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.5678079922993978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.5582720041275024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,0.5943466822306315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.0922026634216309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.6304800113042196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,0.6019413471221924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,0.6055786609649658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.6809919675191244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.33772265911102295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.33777066071828205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.3014133373896281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.2964213291803996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.2885333299636841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.3028159936269124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.3027199904123942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.2909066677093506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.31842132409413654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.3113706707954407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.3105280001958211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.3193066716194153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.3179413278897603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.31272000074386597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.18691200017929077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.18251200517018637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.18274666865666708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.1611786683400472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.16222400466601053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.15366933743158975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.16607999801635742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.16475199659665427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.15666666626930237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.17204266786575317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.1721973419189453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.16660267114639282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.17850132783253989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.1723946730295817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.16897066434224448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.5881653229395548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.10452266534169515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.08936533331871033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.08940800031026204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.09111467003822327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.09109866619110107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.08700799942016602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.09356799721717834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.33747732639312744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.09488532940546672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.09545600414276123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.09751466910044353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.09731733798980713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.09755200147628784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.059301331639289856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.06091733276844025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.05704000095526377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.05579199890295664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.05449600021044413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.057189335425694786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.054666668176651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.05914133290449778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.05845333139101664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.05663999915122986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.058778668443361916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.05670933425426483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.03946666667858759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.03996799886226654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.03794133414824804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.039034667114416756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.032298666735490165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.10733333230018616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.05853866537412008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.03281066566705704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.03294933338960012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.0317546675602595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,0.9298773606618246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,0.9253599643707275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,0.9503839810689291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,0.9375680287679037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,0.9341493447621664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,1.014682690302531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,0.980288028717041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.1272532939910889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,1.0059359868367512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,0.9793919722239176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.0578293005625408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.5571200052897135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.5677546660105387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,0.9310613473256429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.4818720022837321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.47574933369954425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.4919039805730184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.4847253163655599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.5664533376693726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.47815465927124023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.5108906825383505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.5019040107727051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.47889065742492676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.517845352490743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.5074453353881836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.5196693340937296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.29156267642974854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.294869323571523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.2520959973335266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.2498826583226522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.24512000878651938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.25703465938568115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.2549546758333842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.24921600023905435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.2692213257153829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.2646080056826274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.2952853242556254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.2711679935455322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.26740266879399616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.27134400606155396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.1597493290901184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.5508319934209188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.16057599584261575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.13657599687576294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.2720426718393962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.13499733805656433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.13077867031097412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.1397173305352529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.1365653375784556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.13294399778048197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.14408533771832785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.1446560025215149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.14754133423169455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.1576533317565918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.14616533120473227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.1467519998550415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.09178133805592854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.08948799967765808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.09156800309816997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.0759626676638921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.07484266658624013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.07658666869004567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.07550399998823802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.07382399837176006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.07972800234953563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.08231466511885326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.1495626668135325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.08316266536712646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.08180800080299377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.08406399687131245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.05146666864554087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.05246399839719137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.07869866490364075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.04807466765244802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.05031466484069824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.049738665421803795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.04808000226815542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.04554666578769684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.03179733455181122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.03203733265399933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.03180266668399175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.027765333652496338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.025813333690166473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.025909334421157837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.07508799930413564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.02500266581773758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.42987199624379474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.4439520041147868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.4479573170344035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.4344373146692912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.4610453446706136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.46566931406656903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.4606666564941406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.42707733313242596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.5504746834437052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.4681653181711833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.4601653416951497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.47820266087849933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.2683359980583191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.2633120020230611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.27768532435099286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.22273067633310953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.2262453238169352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.23180800676345825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.23095999161402384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.2262293299039205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.2452639937400818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.24024534225463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.2453920046488444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.24928534030914307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.22360533475875854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.14607999722162882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.14202133814493814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.14858667055765787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.11965333422025044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.11796266833941142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.11932800213495891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.12395733594894409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.1234826644261678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.12185600399971008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.13013866543769836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.12930666406949362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.13544000188509622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.2443093260129293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.13171199957529703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.13425599535306296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.08475733796755473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.0860746701558431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.06937066713968913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.06964266796906789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.13573867082595825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.0858133335908254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.07438933352629344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.07282133400440216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.07620266576608022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.07516799867153168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.07308266560236613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.04403733213742574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.04118400067090988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.04158399999141693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.043605332573254905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.04354133208592733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.030826665461063385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.06975999971230824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.2497439980506897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.023728000621000927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.2355519930521647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.23549334208170572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.24121065934499106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.24067733685175577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.23695466915766397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.24342399835586548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.24948267141977945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.2619199951489766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.2605546712875366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.2513173421223958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.2652906576792399
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.14775466918945312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.1546453336874644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.12577600280443826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.12564266721407572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.12801600495974222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.1302079955736796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.12820266683896384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.12987732887268066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.25734933217366535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.1359999974568685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.14321066935857138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.1402613321940104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.137061337629954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.14201600352923074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.08549867073694865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.08715732892354329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.07034666836261749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.06891733407974243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.13852266470591226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.07105599840482076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.07083199918270111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.15067199865976968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.0748586654663086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.07477866609891255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.07680533329645793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.07890133559703827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.07521600027879079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.08119999865690868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.046122665206591286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.046538665890693665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.0487306664387385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.04404266675313314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.04373333354791006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.044079999128977455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.045237332582473755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.08365333080291748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.030981334547201794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.03196800003449122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.02979733298222224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.030192000170548756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.029951999584833782
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.031317333380381264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.02242133269707362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.16024000446001688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.16028799613316855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.16243732968966165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.15964800119400024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.16170666615168253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.16736000776290894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.1678559978802999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.17008533080418906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.16841065883636475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.1684373418490092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.1738133430480957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.10012267033259074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.10335466265678406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.08899199962615967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.08897599577903748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.087226668993632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.08931199709574382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.08918399612108867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.08676266670227051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.09347732861836751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.09181333581606548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.0944160024325053
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.09361599882443745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.09345066547393799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.09502933422724406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.056474665800730385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.050255998969078064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.1002453366915385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.05022400120894114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.05242133140563965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.05003199974695841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.052058666944503784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.05249600112438202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.05231999854246775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.05377600093682607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.037151999771595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.036464000741640724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.03368533402681351
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.16293332974116007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.03504000107447306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03581333408753077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.02447466552257538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.02499733368555705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.05403733253479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.024127999941507976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.12005866567293803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.11997866630554199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.12427199880282085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.12197333574295044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.1221386690934499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.1258026659488678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.12599999705950418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.127018670241038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.12619733810424805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.1301706631978353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.015658666690190632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.07252266506354015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.07181333502133687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.12194666266441345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.0743999977906545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.06625600159168243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.06630933284759521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.07003200054168701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.06857599814732869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.07057066758473714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.07015466690063477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.07022933165232341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.07072533170382182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.04398400088151296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.042223999897638954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.04363200068473816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.04404266675313314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.04385599990685781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.043552001317342125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.06878933310508728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.028757333755493164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.02773333340883255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.04261866708596548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.03025600065787633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.02996266633272171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.021727999051411945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.022053333620230358
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.02033599962790807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.029552000264326733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.02086399992307027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.03018666555484136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.10328533252080281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.10353599985440572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.10287466645240784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.10357866684595744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.10161599516868591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.10549867153167725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.10341333349545796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.10534399747848511
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.10357333223025005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.1056160032749176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.060693333546320595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.062165334820747375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.06044266621271769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.05992533266544342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.10357333223025005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.06002666552861532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.06201066573460897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.06190933287143707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.06043200194835663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.037989333271980286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.037952000896135964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.026416001220544178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.026949333647886913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.020453333854675293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.020549333343903225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.016309333344300587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,0,0.08921066919962566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,fp8,fp8,0,0.08683199683825175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,0,0.08868799606959026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,2,128,0,1,fp8,fp8,0,0.08513599634170532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,0,0.08907199899355571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,0,0.08904000123341878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,0,0.08879466851552327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,0,0.08897067109743755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,8,128,0,1,fp8,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,0,0.08876267075538635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,fp8,fp8,0,0.051872000098228455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,0,0.05186666548252106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,0,0.052229334910710655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,1,128,0,1,fp8,fp8,0,0.050255998969078064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,0,0.05217599868774414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,2,128,0,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,0,0.05234666665395101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,4,128,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,0,0.05150933563709259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,8,128,0,1,fp8,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,0,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,0,0.03459733227888743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,1,128,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,2,128,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,4,128,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,16,4,128,0,1,fp8,fp8,0,0.08522133032480876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,fp8,fp8,0,0.031856000423431396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,0,0.02386666586001714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,0,0.0322826678554217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,1,128,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,2,128,0,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,4,128,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,16,128,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,fp8,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,16,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,0,0.033333333830038704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,1,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,0,0.023925334215164185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,0,0.020106667031844456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,8,128,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,16,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,4,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,8,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,16,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,1,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,2,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,4,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,16,8,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,0,0.016399999459584553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,16,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,1,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,16,4,128,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,4,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,8,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,16,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,2,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,4,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,16,8,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,16,2,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,5.31879456837972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,4.268735885620117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,5.5492909749348955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,5.607168197631836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,5.245669364929199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,4.276336034138997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,5.748485565185547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,5.68836784362793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,2.8273706436157227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,4.301173210144043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,2.83187198638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,2.6505759557088218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,2.673167864481608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,2.2165493965148926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,2.2894879976908364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,2.8711414337158203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,2.7290881474812827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,2.2187253634134927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,2.71341864267985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,1.3845760027567546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,2.2360852559407554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,1.2374719778696697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,1.359114646911621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,1.3320799668629963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,2.7649332682291665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,1.3636639912923176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,1.3460960388183594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,1.2008693218231201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,1.326581319173177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,1.3894507090250652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,1.3466933568318684
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,0.7784266471862793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,1.2074133555094402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,0.7090773582458496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,0.750170628229777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,1.1998186906178792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,0.7520906925201416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,0.6910080115000407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,0.7597866853078207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,0.7592533429463705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,0.7555519739786783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,0.7665386994679769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,0.6970240275065104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,0.8681173324584961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,3.0694828033447266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,2.883546511332194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,2.562986691792806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,0.6942506631215414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,3.088314692179362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,2.959530512491862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,2.57041072845459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,3.1710081100463867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,3.1048854192097983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,1.5643733342488606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,2.602831999460856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,1.5513386726379395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,1.402880032857259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,1.5047252972920735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,1.4853493372599285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,1.3485973676045735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,1.4979732831319172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,1.5215306282043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,1.4953866004943848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,1.5356319745381672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,1.3620319366455078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,0.8414239883422852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,0.8489813009897867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,0.8155732949574789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,1.3482453028361003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,0.8155360221862793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,0.7473813692728678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,0.8234453201293945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,0.8234506448109945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,0.750053326288859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,0.8191359837849935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,0.7738773028055826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,0.833184003829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,0.7528106371561686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.5004426638285319
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.5105973482131958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.47731733322143555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.47867735226949054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.4402666489283244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.483514666557312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.4809066851933797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.44421335061391193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.48846932252248126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.4811946551005046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.4495946566263835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.45744534333546955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,2.2884373664855957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,1.8740426699320476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,2.1158560117085776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,1.882032076517741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,2.1285120646158853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,2.1652426719665527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,2.210261344909668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,1.1958399613698323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,1.893712043762207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,2.1744960149129233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,1.1570293108622234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,1.0386559963226318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,1.0919840335845947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,1.0943946838378906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,1.0947306950887044
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,1.09879469871521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,1.0948479970296223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,1.1148959795633953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,1.1146986484527588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,1.0051413377126057
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,0.6562399864196777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,0.6338933308919271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.5794826745986938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,0.621669332186381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,0.6046559810638428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,1.0047039985656738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.5601919889450073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,0.61681067943573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,0.6202826499938965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.5600159962972006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,0.6165013313293457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,0.6233493487040201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.3810186783472697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.3834559917449951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.3499679962793986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.3661760091781616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.37029866377512616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.3370293378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.3691893418629964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.36799466609954834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.5646400054295858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.36820801099141437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.37966398398081463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.34353065490722656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.33772265911102295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,2.948997179667155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,2.7870613733927407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,2.8227628072102866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,2.476352055867513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,2.48636261622111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,2.9930667877197266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,3.0643040339152017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,2.944101333618164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,1.5120320320129395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,1.505674680074056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,1.355610688527425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,1.4135947227478027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,1.4181493123372395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,2.51473061243693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,1.4448426564534504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,1.286511977513631
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,1.281882683436076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,1.4397226969401042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,1.3008053302764893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,0.8434346516927084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,0.8121493657430013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,1.4249812761942546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,0.7273279825846354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,0.8398400147755941
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,0.7705972989400228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,0.6910026868184408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,0.7806399663289388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,0.7636906305948893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,0.7710560162862142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,0.769765297571818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,0.7030560175577799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.450383981068929
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,1.435765266418457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.4157493511835734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.43372265497843426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,0.693120002746582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.396234671274821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.4334026575088501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.4402986764907837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.3985919952392578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.4382400115331014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.4620586633682251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.40243732929229736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.42901333173116046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.2781599958737691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.25707733631134033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.26631999015808105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.269706666469574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.2468000054359436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.26359466711680096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.26683733860651654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.43966933091481525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.2460426688194275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.2672693332036336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.2804906765619914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.24757333596547446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,1.7464693387349446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,1.5362292925516765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.2703146735827128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,1.6933493614196777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,1.7276105880737305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,1.5414080619812012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,1.720106601715088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,1.791327953338623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,1.7274400393168132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,0.9640373388926188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,1.563744068145752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,0.9836053053538004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,0.9096159934997559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,0.889946699142456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,0.8054186503092448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,0.9139946301778158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,0.8954826990763346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,0.9025759696960449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,0.9059999783833822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,0.83515731493632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,0.5265066623687744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,0.8089280128479004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.4694026708602905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,0.9416000048319498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.49274667104085285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.477786660194397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.4434826771418254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.4827573299407959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.4989173412322998
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.44466666380564374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.5039680004119873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.49164267381032306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.4495306809743245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.29782400528589886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.3078026572863261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.27507199843724567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.285098671913147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.2780960003534953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.25596266984939575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.2804159919420878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.2897333304087321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.2610773245493571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.29201066493988037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.2651306589444478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.5131626526514689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.19149333238601685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.17898666858673096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.18699200948079428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.17069333791732788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.18932799498240152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.28917332490285236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.1881706714630127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.19373865922292074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.1878719925880432
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.1896160046259562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.17730667193730673
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.18467199802398682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,1.7532745997111003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,1.7348480224609375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.1716053287188212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,1.7538026173909504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,1.7491893768310547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,1.5801119804382324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,1.5704320271809895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,1.8104640642801921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,1.8562240600585938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,1.6108479499816895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,1.0000267028808594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,0.880133310953776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,0.8865280151367188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,0.8968426386515299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,0.8915839989980062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,0.8989493052164713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,0.9633386929829916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,0.8141333262125651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,0.9074719746907552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,0.8278400103251139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.5170186758041382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.5148160060246786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.47035733858744305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.4734239975611369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.4878559907277425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.43376000722249347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.488101323445638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.4780106544494629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.4368693431218465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.48263998826344806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.4907519817352295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.4434826771418254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.28994667530059814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.29179733991622925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.26658133665720624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.26414400339126587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,0.9010186990102133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.24540799856185913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.2711840073267619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,0.8082133134206136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.24664533138275146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.27662932872772217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.27406932910283405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.25036267439524335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.1718453367551168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.17665066321690878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.1628159979979197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.1665546695391337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.26848532756169635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.14723199605941772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.16293866435686746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.16507200400034586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.14793599645296732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.16127467155456543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.1667520006497701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.1511786679426829
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.11241599917411804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.11909866333007812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.10729599992434184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.10999466975529988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.110944002866745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.10308266679445903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.11133866508801778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.11090133587519328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.10389866431554158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.11111467083295186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.2746826608975728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.10757333040237427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.10340266426404317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,1.130570650100708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,1.124618689219157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.16262933611869812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,1.0187359650929768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,1.1270079612731934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,1.1326719919840496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,1.0265119870503743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,1.14137601852417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,1.150053342183431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,0.6322346528371176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,0.6385333140691122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.5837706724802653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,0.5917919874191284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,0.5937013228734335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,0.583893338839213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,0.5959466695785522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.5332213242848715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,0.6036159992218018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,0.5942933162053426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.5427146752675375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.33955200513203937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.529365340868632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.35425599416097003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.3179786602656047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.3155253330866496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.32130666573842365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.2896639903386434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.31938666105270386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,1.048634688059489
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.32572799921035767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.3285866578420003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.296453336874644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.19734932978947958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.20100800196329752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.18313600619633993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.1768959959348043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.17915199200312296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.16215466459592184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.17832533518473306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.1755626598993937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.16915200153986612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.29184534152348834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.1925333340962728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.18726933002471924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.1740586757659912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.12230933705965678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.11611732840538025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.11286399761835735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.11388267079989116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.10558933019638062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.1144426663716634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.11514666676521301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.1053706705570221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.115365336338679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.11640533804893494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.1092746655146281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.07634666562080383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.07683733105659485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.07261866827805837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.07444799939791362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.07085866729418437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.07544533411661784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.07531733314196269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.07233599821726482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.07564799984296162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.3254026571909587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,1.2193919817606609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,1.2207307020823162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,1.1105546951293945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,1.2326506773630779
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,1.2434453169504802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.1257440249125164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,1.243839979171753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,1.2654613653818767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.11961600184440613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.1498773097991943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,0.7147200107574463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,0.6340800126393636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,0.6309706767400106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.5731893380482992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,0.6344693501790365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,0.6434666713078817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.5780160029729208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,0.6524693171183268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,0.6500320037206014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.5915893316268921
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.3681866725285848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.3784373203913371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.3415306806564331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.3380959828694661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.33497599760691327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,0.6919573148091634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.33724268277486164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.3381386597951253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.30833067496617633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.341973344484965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.3455466826756795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.30631999174753827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,0.6435679992039999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.20788266261418661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.2095200022061666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.18226132790247598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.18369066715240479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.1709173321723938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.18486400445302328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.31436266501744586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.18498132626215616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.17287466923395792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.19162132342656454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.19207467635472616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.17731734116872153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.1209440032641093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.11452266573905945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.10845866799354553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.1071519951025645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.10822400450706482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.0995199978351593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.110944002866745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.11154133081436157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.19139200448989868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.10409067074457805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.07603199779987335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.07843199868996938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.07269333302974701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.07339199880758922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.07520000139872234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.07543999950091045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.07669866581757863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.0687360018491745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.07522133489449818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.07532266775767009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.06900266806284587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.05629333357016245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.05641066531340281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.05420266588528951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.054586668809254967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.052282666166623436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.0562720000743866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.05400000015894572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.056314667065938316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.05625600119431814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.10849600036938985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,0.8725866476694742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,0.8740959962209066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,0.7840747038523356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.11902933319409688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,0.8847359816233317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,0.8821653525034586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,0.7948426405588785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,0.8901546796162924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,0.8939092953999838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,0.8035253683725992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.49978665510813397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.43906664848327637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.44099199771881104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.40037866433461505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.4413546721140544
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.4455946683883667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.40407466888427734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.4510879913965861
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.4992426633834839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.45624534289042157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.26658133665720624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.4563680092493693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.2672800024350484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.2442400058110555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.2368053396542867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.23519466320673624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.21626132726669312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.23898667097091675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.23878933986028036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.21597333749135336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.24501333634058634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.24417599042256674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.22200000286102295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.14722667137781778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.1474133332570394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.1379039982954661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.1265120009581248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.12429866194725037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.11551466584205627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.12964266538619995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.1302773356437683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.12172800302505493
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.13452266653378805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.13402666648228964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.12796266873677573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.08694400389989217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.08242666721343994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.07845333218574524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.07986666758855183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.07292800148328145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.08014933268229167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.0809333324432373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.41283734639485675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.07914666831493378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.08559466401735942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.07479466497898102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.05395199855168661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.05219733218352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.052245333790779114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.05235200126965841
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.05211733281612396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.04587199787298838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.047584002216657005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.046154667933781944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.04418666660785675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,0.8737866878509521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.07373866438865662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,0.8664800326029459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,0.889087994893392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04726399978001913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,0.8905333677927653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,0.8869120279947916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,0.9065173467000326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,0.9025920232137045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.517194668451945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,0.8959306875864664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.5072000026702881
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.5077919960021973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.4495146671930949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.45047465960184735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.4447786808013916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.459333340326945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.4599039951960246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,0.8702507019042969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.4541120131810506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.4710773229598999
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.4700586795806885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.2749386628468831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.2700586716334025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.23624533414840698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.2347093423207601
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.23041067520777384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.24331200122833252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.24129599332809448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.24116800228754678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.24726933240890503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.24756266673405966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.24119999011357626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.46080533663431805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.26743467648824054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.1481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.1304746667544047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.12837866942087808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.12329600254694621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.13115732868512472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.13049599528312683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.12974400321642557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.1363146702448527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.1344533363978068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.13371200362841287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.08701866865158081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.08539733290672302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.07833600044250488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.07072533170382182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.07860800127188365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.07766399780909221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.07158400118350983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.07831466694672902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.07835733393828075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.07474666833877563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.05506666501363119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.052746668457984924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.05356266597906748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.07700799902280171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.05226133267084757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.038106667498747505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.03809066613515218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.03738133360942205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.03534399966398875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.1502293348312378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,0.7390453020731608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,0.7420960267384847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,0.7585279941558838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,0.7564106782277426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,0.7631253401438395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,0.740336020787557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.04956800242265066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,0.7738613287607828
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,0.7666292985280355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.44730667273203534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.43724799156188965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.4472693204879761
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.38027199109395343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.37941332658131915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.3821813265482585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.39054401715596515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.38920001188913983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.39057600498199463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.4005013306935628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.39633599917093915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.40778132279713947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.2309760053952535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,0.785045305887858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.23456533749898276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.20217599471410116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.19979733228683472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.2063466707865397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.20570133129755655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.20812267065048218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.21299733718236288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.20906666914621988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.21077332894007364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.13062399625778198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.12784533699353537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.1279306709766388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.10983467102050781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.10880000392595927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.10419733325640361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.11246933539708455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.11169600486755371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.1125866671403249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.11461333433787028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.11477866768836975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.11449600259462993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.07343466579914093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.07631466786066692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.06693333387374878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.23839465777079263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.06592000027497609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.05929600199063619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.06656533479690552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.06485333542029063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.19772799809773764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.06806399921576183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.06363200147946675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.04714133342107137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.047413334250450134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.04538666705290476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.04005866746107737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.04144000013669332
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.044351999958356224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.04510400195916494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.03183999905983607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.07505600154399872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.03162666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.032111999889214836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.06647466619809468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.027327999472618103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.026543999711672466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.02882666637500127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.02566933383544286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.34310932954152423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.3407306671142578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.35387734572092694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.3510560194651286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.3599199851353963
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.3594079812367757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.35553598403930664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.21609600385030112
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.2115359902381897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.2185386617978414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.17940799395243326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.17884800831476846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.18345600366592407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.18385066588719687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.18509334325790405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.19029333194096884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.18869866927464804
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.18736000855763754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.19410133361816406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.11611200372378032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.12040000160535176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.10113599896430969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.10105066498120625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.1032480001449585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.1020853320757548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.10361599922180176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.10573866963386536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.10532266894976298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.10641599694887798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.06705600023269653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.3686240116755168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.0697386662165324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.11948800086975098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.05930666625499725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.059343998630841575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.059802666306495667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.060080001751581825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.060517330964406334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.05849599838256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.042912001411120095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.04237333436806997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.040421334405740104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04005333284536997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.041237334410349526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.04101333270470301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.029743999242782593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.025621332228183746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.05840533475081126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.02221333235502243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.02011200040578842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.021669333179791767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.02258666604757309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.02163200080394745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.34913067022959393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.19177599747975668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.1960373322168986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.19813867410024008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.19773866732915243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.20269866784413657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.20266133546829224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.20080532630284628
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.20921599864959717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.19236799081166586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.1206773320833842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.12586667140324911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.1056160032749176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.10246400038401286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.10803199807802837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.10857599973678589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.11004799604415894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.11155733466148376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.10977066556612651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.11354666948318481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.12237866719563802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.06977599859237671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.07314666608969371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.062090665102005005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.062458669145902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.056688000758488975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.060640002290407814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.06157866617043813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.0621013343334198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.06038933495680491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.03990933299064636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.0402399996916453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.038293334345022835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.0699786643187205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.03950933367013931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.0401706670721372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.029829333225886028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.028959999481836956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.02884799987077713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.028069332242012024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.0632479985555013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.022042666872342426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.0581226646900177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.017717332889636356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.1300159990787506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.12827199697494507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.018138666947682697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.13080533345540366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.1304800013701121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.13648000359535217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.1322986682256063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.1322879989941915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.13658133149147034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.08092266817887624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.08040533463160197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.08468799789746602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.07254933317502339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.06833066542943318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.07276799778143565
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.07266133526961009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.07081066568692525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.07253333429495494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.07332799832026164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.045968001087506614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.0469706654548645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.04532266656557719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.04399999976158142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.04370133578777313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.046240001916885376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.045456002155939736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.03249600032965342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.030591999491055805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.02510933329661687
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.09717866778373718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.09311999877293904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.09698133667310078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.09550933043162028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.09706667065620422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.09816533327102661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.058864002426465355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.06052800019582113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.056976000467936196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.056554665168126426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.05671466886997223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.05568000177542368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.05796800057093302
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.05569066603978475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.03738133360942205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.037130666275819145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.03706666578849157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.03594133257865906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.01979200045267741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.017770666629076004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.09723200400670369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.08294933537642162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.08239999910195668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.08088000118732452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.08260799944400787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.08227199812730153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.08115733166535695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.08461866776148479
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.08297599852085114
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.050479998191197716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.04962133367856344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.050160000721613564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.03457599878311157
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.034234667817751564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.03421333432197571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.03454400102297465
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.03472533325354258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.033285332222779594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.022517333428064983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.018810667097568512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.02000533292690913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.01599466676513354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.05198400219281515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,float16,float16,0,0.07255466779073079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,float16,fp8,0,0.0707893321911494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,1,128,0,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,float16,float16,0,0.07039466500282288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,fp8,fp8,0,0.06879466772079468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,float16,float16,0,0.07049599786599477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,float16,fp8,0,0.07250666618347168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,4,128,0,1,fp8,fp8,0,0.07019733389218648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,0,0.04424533247947693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,12,128,0,1,fp8,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,float16,float16,0,0.042319998145103455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,float16,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,1,128,0,1,fp8,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,float16,float16,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,2,128,0,1,fp8,fp8,0,0.04520000020662943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,float16,float16,0,0.04350399971008301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,float16,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,12,4,128,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,float16,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,1,128,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,2,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,float16,float16,0,0.03032533327738444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,4,128,0,1,fp8,fp8,0,0.02908266584078471
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,12,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,float16,float16,0,0.022757334013779957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,1,128,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,float16,fp8,0,0.024192000428835552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,2,128,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,float16,float16,0,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,12,4,128,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,12,2,128,0,1,float16,fp8,0,0.07071466743946075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,1,128,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,2,128,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,float16,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,12,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,12,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,2,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,12,12,128,0,1,fp8,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,12,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,12,1,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,2,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,float16,fp8,0,0.01643199970324834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,4,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,12,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,12,4,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,12,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,1,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,2,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,12,4,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,12,1,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,12,4,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,3.326197306315104
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,2.8745225270589194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,3.6827732721964517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,3.239898681640625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,3.29857603708903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,2.892218589782715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,1.8082027435302734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,3.82150936126709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,2.912837346394857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,3.5131092071533203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,1.8528639475504558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,1.557626724243164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,1.692405382792155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,1.6842506726582844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,1.5154719352722168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,1.784287929534912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,1.7306079864501953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,1.725861390431722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,1.8414506912231445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,0.9367573261260986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,1.6783466339111328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,0.862015962600708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,0.9246453444163004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,0.9178933302561442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,1.5297706921895344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,0.8410933017730713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,0.9223093191782633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,0.9419840176900228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,0.8429706891377767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,0.926042636235555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,0.9359573523203532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,0.8491040070851644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,0.5542346636454264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,0.5606880187988281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.5138186613718668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,0.5421493450800577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,0.9861120382944742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.4982453187306722
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,0.5610400040944418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,0.5594240029652914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.5008213520050049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,0.541045347849528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,0.5605013370513916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,0.5462933381398519
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.5058613220850626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,2.05183998743693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,1.7365013758341472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,2.002314726511637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,1.9718613624572754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,1.9218133290608723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,1.7447306315104167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,2.0503360430399575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,1.0508320331573486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,1.7595465977986653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,1.1812427043914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,0.964949369430542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,1.030666669209798
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,1.050719976425171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,0.927237351735433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,2.0530293782552085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,1.0542240142822266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,1.0893386999766033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,1.0410719712575276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,1.0612853368123372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,1.0710773468017578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,0.6054293314615885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.5468533436457316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,0.5772693157196045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,0.5892106691996256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.5277173519134521
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,0.5967466831207275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,0.5818346738815308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.5310773452123007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,0.5868800083796183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,0.5968533356984457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.5345280170440674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,0.9325973192850748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.3707840045293172
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.3665226697921753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.3352213303248088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.35981865723927814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,0.5842986504236857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.324015994866689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.3587199846903483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.3582079807917277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.32491199175516766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.3603466749191284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.3594026565551758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.330458660920461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,1.41921599706014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,1.43613862991333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,1.4579572677612305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,1.40994660059611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,1.2786933581034343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,1.2673333485921223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,1.4978399276733398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,0.8610933621724447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.35596267382303876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,0.813594659169515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,0.7161493301391602
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,1.292101303736369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,0.7598186333974203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,0.689130703608195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,1.4064052899678547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,0.7615679899851481
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,0.6914186477661133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,0.7963573137919108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,0.7801813284556071
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,0.697920004526774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.45394666989644367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,0.7714293003082275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.4150720040003459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.4371573527654012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.44205331802368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,0.7597173055013021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.43534934520721436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.4424000183741252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.4007093509038289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.4453973372777303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.39645334084828693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.40615999698638916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.28549333413441974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.28358399868011475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.262992004553477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.27587733666102093
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.45021335283915204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.25416000684102374
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.2823093334833781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.2755519946416219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.25570666790008545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.2760053277015686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.28539733091990155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.2565760016441345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.2802506685256958
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,1.9427413940429688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,1.8613120714823406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,1.6678026517232258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,2.0361812909444175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.4516479969024658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,2.035055955251058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,1.684783935546875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,1.8674933115641277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,1.9903519948323567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,1.0215306282043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,1.7018826802571614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,1.0344746907552083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,0.9302879969278971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,0.9715627034505209
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,0.99507737159729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,0.8761173089345297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,0.9987626870473226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,0.8826560179392496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,0.9615466594696045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,0.9905707041422526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,0.8941280047098795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,0.5881653229395548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.506218671798706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,0.975711981455485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,0.5761440197626749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,0.529584010442098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.4822293519973755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,0.5455093383789062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,0.5314453442891439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.4870186646779378
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,0.5489546855290731
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,0.5452266534169515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,0.5453120072682699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.49246398607889813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.3333546717961629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.33267199993133545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.3183786670366923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.31613866488138836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.2836959958076477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.31139200925827026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.3193440039952596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.2882560094197591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.3194933334986369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.32334933678309125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.2938986619313558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.21085333824157715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.21368533372879028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.19406932592391968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.20610666275024414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.20889600118001303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.1918026606241862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.20707199970881143
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.2100480000178019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.18939199050267538
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.20669867595036825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.20965333779652914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.1897653341293335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,1.1566027005513508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,1.1656959851582844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.2998986641565959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,1.1667040189107258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,1.1630346775054932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,1.0476693312327068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,1.1607786814371746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,1.0365280310312908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,0.6405920187632242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,0.6498719851175944
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.5932533343633016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,0.6115839878718058
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,0.6093226671218872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.5545119841893514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,0.613050659497579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,0.6090186834335327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.5604693492253622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,0.6151839892069498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,0.6240853468577067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.5684159994125366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.35733334223429364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,1.1763306458791096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.3620693286259969
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.3307093381881714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.34625065326690674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.33743464946746826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.3124106725056966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.3500639994939168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.3455679814020793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.3157386581103007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.3572746515274048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.349834680557251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.31989866495132446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.21890133619308472
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.2166879971822103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.20132267475128174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.20814400911331177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.2042293349901835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.18611733118693033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.2092693249384562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.2059040069580078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.187391996383667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.21180800596872965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.2055520017941793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.19553599754969278
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.1406719982624054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.13944000005722046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.1320480008920034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.13672533631324768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.13751999537150064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.12774399916330972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.13911466797192892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.1384266714255015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.1297920048236847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.13724266489346823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.13871999581654867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.12995733817418417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,1.0641547044118245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,1.1839786370595295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,1.162021319071452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,1.0546186765034993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,1.1907413005828857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,1.1625760396321614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,1.0721653302510579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,1.2070986429850261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,1.1834666728973389
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,1.0913546880086262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,0.6895840167999268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,0.6616853475570679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,0.6176373163859049
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,0.6090879837671915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,0.6203680038452148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.5992853244145712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,0.6033386786778768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.5577546755472819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,0.6202720006306967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,0.6397546529769897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.35494399070739746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.35283199946085614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.3247680068016052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.3301279942194621
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.32629332939783734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.30208534002304077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.32977600892384845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.33695467313130695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.30613867441813153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.5515946547190348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.5719786485036215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.34062933921813965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.31196266412734985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.2074133356412252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.1896373430887858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.19057599703470865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.1904319922129313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.17536532878875732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.19073599576950073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.19034133354822794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.17720532417297363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.19698133071263632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.20021865765253702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.34036266803741455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.12661332885424295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.12763733665148416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.12132267157236735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.12092266480127971
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.12198399504025777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.11621333161989848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.1218986709912618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.12237866719563802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.11353600025177002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.12377599875132243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.1246560017267863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.11548800269762675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.08479467034339905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.08076799909273784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.0825493335723877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.08270933230717976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.0811466674009959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.08239999910195668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.0786186655362447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.0822026679913203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.08087466657161713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,0.7494613329569498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.08320533235867818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,0.6851627031962076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,0.7646079858144125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,0.7600800196329752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,0.6994773546854655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,0.7717706362406412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,0.7808106740315756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.43747735023498535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.43851733207702637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.20922134319941202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.39816534519195557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.40979735056559247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.3972906668980916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.36238932609558105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.3999413251876831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.4153546492258708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.37002134323120117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.4166666666666667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.4124053319295247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.3777066469192505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.2441706657409668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.24862400690714517
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.22368532419204712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,0.7163893381754557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.22894400358200073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.21988266706466675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.2032853364944458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.22419732809066772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.2288586695988973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.20697067181269327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.23188267151514688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.23222400744756064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.21180800596872965
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.14457066853841147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.1339413324991862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.1297813355922699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.13013866543769836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.11707199613253276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.12897599736849466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.12896533807118735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.11940800150235494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.1306933363278707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.13499200344085693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.1400213340918223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.12562132875124613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.08864532907803853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.08309333523114522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.0848586658636729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.08461333314577739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.08498133222262065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.07926933467388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.08678932984670003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.0805920014778773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.06694933275381725
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,0.7611680030822754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.06507200002670288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.08717866738637288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.06447466711203258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.06434133152167003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.06253866851329803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,0.8386080265045166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.06446399788061778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,0.8673973083496094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,0.8740320205688477
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,0.7925013701121012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,0.875386635462443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,0.7635520299275717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,0.8634026845296224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,0.7943413257598877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.4758880138397217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.48341866334279376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.42929601669311523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.4302826722462972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,0.8420746326446533
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.4366453488667806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.43986666202545166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.40243200461069745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.44811733563741046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.45429333051045734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.4389813343683879
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.4116106828053792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.26499199867248535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.23757332563400269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.23638933897018433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.23283199469248453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.21377599239349365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.2365600069363912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.24061334133148193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.21782932678858438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.24637866020202637
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.24458134174346924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.23054399092992148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.2570986747741699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.14797332882881165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.1368053356806437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.12744533022244772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.1267733375231425
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.11781332890192668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.12878933548927307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.13081600268681845
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.12426666418711345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.13668266932169595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.1360213359196981
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.12999999523162842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.08285866677761078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.08715732892354329
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.08469333251317342
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.08101866642634074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.08101333181063335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.07644799848397572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.08065066734949748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.39183465639750165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.07477333148320515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.08076799909273784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.08292266726493835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.07838933169841766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.05539733171463013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.05625066657861074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.05392533540725708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.05421333511670431
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.05075199902057648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.05429866909980774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.052005335688591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.14408533771832785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.05462933580080668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.05351466437180837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.08134933312733968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.048112000028292336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.0460746685663859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.04822933177153269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.048165331284205117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.04722133278846741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.5799946784973145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.5879040161768595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.5294880072275797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.04834666848182678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.6071253220240275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.5954293409983317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.6015146573384603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.048751999934514366
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.5462133487065634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.3422880172729492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.34041066964467365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.31251732508341473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.30028265714645386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.5406613349914551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.3060479958852132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.2762986620267232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.3057013352711995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.3110293348630269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.281765341758728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.3129173318545024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.3195199966430664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.18360000848770142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.1877546707789103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.1709173321723938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.15981333454449972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.1600320041179657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.15251200397809347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.16387733817100525
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.16666666666666666
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.1546026666959127
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.17280532916386923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.17189333836237589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.15922666589419046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.2877440055211385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.10044266780217488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.09101866682370503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.0913759966691335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.6057600180308024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.09135466814041138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.10328533252080281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.10595200459162395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.09571733077367146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.09386666615804036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.06334400177001953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.08354133367538452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.06003733476003011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.08548800150553386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.060047999024391174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.06052266558011373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.06187200049559275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.05659733215967814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.06046399970849355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.06228800117969513
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.045882667104403176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.04190933207670847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.04397333165009817
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.04398400088151296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.04371733466784159
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.09384000301361084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.0644160012404124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.5801813205083212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.5685173273086548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.6032160123189291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.587551991144816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.5831946531931559
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,0.6497439940770467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,0.6430879831314087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.35230398178100586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.3398666779200236
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.3426133394241333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.30074665943781537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.30409600337346393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.2954933245976766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.312885324160258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.30986666679382324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.30057066679000854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.32843732833862305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.3137813409169515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.18853867053985596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.18925867478052774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.5776106516520182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.1830293337504069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.3253706693649292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.16602133711179098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.15922133127848306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.17189333836237589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.16885866721471152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.16261333227157593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.17563199996948242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.1827146609624227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.1692053278287252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.10294399658838908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.10431466499964397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.6031839847564697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.09197866916656494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.09123733639717102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.08890666564305623
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.09304533402125041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.09362666805585225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.09092799822489421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.0997279981772105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.09715732932090759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.06039999922116598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.05905599892139435
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.058431997895240784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.1648960014184316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.05766933163007101
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.054048001766204834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.05983466903368632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.05840533475081126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.05797333518664042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.03714666763941447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.03595199932654699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.10716799894968669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.03899200012286504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.03352533280849457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.06057600180308024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.05710400144259135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.031658666829268135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.03219733387231827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.03321066747109095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.029029332101345062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.029509333272775013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.02993600070476532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.4959733486175537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.4899413188298543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.02975466599067052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.5013493299484253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.504741350809733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.5044373273849487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.4975626468658447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.5409066677093506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.5266293287277222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.30078933636347455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.5216480096181234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.2937440077463786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.30089600880940753
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.25685866673787433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.2548426588376363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.2536906599998474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.26422399282455444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.2592800060908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.2762879927953084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.2781226634979248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.1623199979464213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.15742933750152588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.1586026648680369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.13634133338928223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.25840532779693604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.1434719959894816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.1407786707083384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.2719893256823222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.14960533380508423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.13981866836547852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.14869333306948343
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.14644799629847208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.13617066542307535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.09076799949010213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.07709866762161255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.1386240025361379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.07266666491826375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.07799999912579854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.07877333462238312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.0848640004793803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.0827466646830241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.09131200114885966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.0503359983364741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.08941866954167683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.04842666784922282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.046181331078211464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.04962133367856344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.049423997600873314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.05038933455944061
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.04946133494377136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.02693866689999898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.027082666754722595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.027674667537212372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.024058667321999867
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.023738667368888855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.021759999295075733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.22353599468866983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.0503359983364741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.2229386568069458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.23562665780385336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.23148800929387411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.2254133423169454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.23691733678181967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.25142399470011395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.24613332748413086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.0848640004793803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.25101866324742633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.14940800269444784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.11979200442632039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.1179039975007375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.12422933181126912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.12447999914487202
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.12268267075220744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.12592533230781555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.13556800285975137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.13029332955678305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.13417067130406699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.0846666693687439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.08187200129032135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.08528000116348267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.1433013379573822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.0690826674302419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.0703893353541692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.06932266553243001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.07682133217652638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.07493333518505096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.07965333263079326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.06955733398596446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.04736533264319102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.0728053351243337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.04196266829967499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.04124800115823746
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.04200533529122671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.04370133578777313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.027669332921504974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.024847999215126038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.02478933334350586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.1458186705907186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.02042666698495547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.020597333709398907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.12897599736849466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.12825600306193033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.13301866253217062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.13210133711496988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.12917866309483847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.13666133085886636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.14029332995414734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.1437120040257772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.08434666196505229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.0876586635907491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.07283733288447063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.07273600002129872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.07503999769687653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.07277866701285045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.0765173335870107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.07671999931335449
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.07703466713428497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.08122133215268452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.07262933254241943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.04438399771849314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.04515733321507772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.045456002155939736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.043925335009892784
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.047210668524106346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.03127466638882955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.030858665704727173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.030960001051425934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.029824001093705494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.029706666866938274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.1362399955590566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.0909440020720164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.09125333031018575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.08866133292516072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.09300266702969869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.09142933289210002
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.09333866834640503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.09317866961161296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.0974133312702179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.05202666421731313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.05208000044027964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.03558400024970373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.0352906659245491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.03474666674931844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.03548266738653183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.05235733091831207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.016586666305859882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.0685280015071233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.06657599906126659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.06883733471234639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.0674239993095398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.07069866855939229
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.07074133555094402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.07088000078996022
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.01766933376590411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.04257066547870636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.04490133126576742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.042490666111310325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.0439626673857371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.04426133135954539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.04215999941031138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.028575999041398365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.04399999976158142
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.06042666733264923
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.06000000238418579
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.06057600180308024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.06047999858856201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.06132266422112783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.03898133337497711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03738666574160258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.025802666942278545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.02608533451954524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.019839999576409657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.016314666718244553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.01664000004529953
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.026890667776266735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,0,0.05253333350022634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,1,128,0,1,fp8,fp8,0,0.051728000243504844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,0,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,fp8,fp8,0,0.05016533533732096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,fp8,fp8,0,0.051872000098228455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,0,0.03398933261632919
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,0,0.052416001756985985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,1,128,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,fp8,fp8,0,0.03198933353026708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,0,0.03374933451414108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,4,128,0,1,fp8,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,8,128,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,1,128,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,2,128,0,1,fp8,fp8,0,0.02497066557407379
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,8,4,128,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,8,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,0,0.02187199890613556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,1,128,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,4,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,8,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,1,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,8,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,1,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,8,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,1,128,0,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,4,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,1,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,4,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,1.7667892773946126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,1.70088529586792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,1.5049120585123699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,1.7857066790262859
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,1.8462986946105957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,1.517936070760091
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,1.0914773146311443
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,0.9363733132680258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,0.9871359666188558
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,0.933568000793457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,0.8255519866943359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,0.932581345240275
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,0.9450826644897461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,0.56495467821757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,0.5581226746241251
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,0.8957760334014893
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,0.5345173279444376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,0.5252480109532675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.4814720153808594
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,0.5323253472646078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,0.5586400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.487338662147522
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.4934186538060506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.3507839838663737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.3476853370666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.31805866956710815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.34644798437754315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.3434773286183675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.3129120071729024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.3472906748453776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.3442026774088542
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.3145866592725118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,1.0673279762268066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,0.9235893090566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,1.0579573313395183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,1.042453368504842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,0.8308959801991781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,0.9350132942199707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,0.5890186627705892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,1.0213013490041096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.532965342203776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,0.5723359982172648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,0.5689706802368164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.5185439984003702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,0.573312004407247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,0.5756160020828247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.5232853492101034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.3487306833267212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,0.5885226726531982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.3381280104319255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.343610684076945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.3099466760953267
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.3444639841715495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.3408159812291463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.31305599212646484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.2246346672375997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.3545866807301839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.20894932746887207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.32204800844192505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.22549333175023398
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.20678933461507162
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.22457599639892578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.22470400730768839
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.22654932737350464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.2064639925956726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.22466667493184408
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,0.763808012008667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,0.7499306996663412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,0.684607982635498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,0.7772640387217203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,0.757472038269043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,0.6915466785430908
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.4402933518091838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.4388586680094401
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.4019999901453654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.4333920081456502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.438591996828715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.3881066640218099
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.447221318880717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.3940800031026204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.28128000100453693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.2706933418909709
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.2518986662228902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.26945066452026367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.2692799965540568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.2405866583188375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.2733546694119771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.2701493302981059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.2443466583887736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.17552000284194946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.17939732472101846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.15863466262817383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.17563732465108237
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.16724266608556113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.1546880006790161
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.4352000157038371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.16771199305852255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.1569653352101644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,0.9761173725128174
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,0.9884533087412516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,0.9683039983113607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,0.9931413332621256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.169429341952006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,0.5589919884999593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,0.5702133178710938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.4987573226292928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,0.8798613548278809
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,0.5382826725641886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,0.5285226504007975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.47893865903218585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,0.5459146499633789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,0.5382773478825887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.3227733373641968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.3221333424250285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.29154666264851886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.30506666501363117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.31382934252421063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.27907200654347736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.3161919911702474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.3104959925015767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.2845173279444377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.19955732425053915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.4863893191019694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.18545067310333252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.19585599501927695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.2070186734199524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.17777599891026816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.19855999946594238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.19839467604955038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.17940799395243326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.1302293340365092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.12460800011952718
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.20435200134913126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.1276479959487915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.1281706690788269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.12195199728012085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.1284160017967224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.12819733222325644
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.12381866574287415
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.12826133767763773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,0.6164213418960571
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.55731733640035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,0.6367893218994141
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,0.8925920327504476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.5667466719945272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.35622934500376385
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.36510932445526123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.32520532608032227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,0.606053352355957
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.3466026782989502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.3399413426717122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.31014400720596313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.35463468233744305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.3503893216451009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.3145013252894084
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.21300800641377768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.21418132384618124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.2118933399518331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.1999946633974711
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.18133334318796793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.2081973354021708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.2076266606648763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.18708799282709757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.13289067149162292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,0.6284319957097372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.13486933708190918
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.12244266271591187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.13176533579826355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.12928533554077148
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.1200320025285085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.13105066617329916
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.1328053375085195
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.12181333700815837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.10012267033259074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.09697600205739339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.09962667028109233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.10125866532325745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.19389333327611288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.10136533776919048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.10045866171518962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.09692266583442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,0.6313653389612833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,0.6429813305536906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.5655253330866495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,0.6706879933675131
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,0.645962675412496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.5781759818394979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.36452798048655194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.36419200897216797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.32450666030248004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.3361920118331909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.3411946694056193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.3057653307914734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.34143467744191486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.3553973436355591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.31221334139506024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.20443199078241983
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.2102666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.1886720061302185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.1907786726951599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.19290133317311606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.17507733901341757
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.20472532510757446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.1987733244895935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.17909866571426392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.12238400181134541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.1234879990418752
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.11773332953453064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.11847466230392456
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.12046933174133301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.1088800032933553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.1199679970741272
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.12133333086967468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.11055466532707214
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.08061866462230682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.0784746656815211
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.07798400024573009
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.07321066657702129
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.07716266810894012
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.07838933169841766
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.07458133498827617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.0727893312772115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.07291199763615926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.07262399792671204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.07250133156776428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.06862399975458781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.07247999807198842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.0724373310804367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.07036800185839336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.41332801183064777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.4278506835301717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.37748265266418457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.4324479897816976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.4263840119043986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.39557866255442303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.2446826696395874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.2523253361384074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.22457599639892578
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.2318399945894877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.22689600785573324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.20791999499003092
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.23409066597620645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.23881065845489502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.09518399834632874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.13798933227856955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.13993066549301147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.13157866398493448
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.1256586710611979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.13327999909718832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.07901866734027863
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.13053866227467856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.13032533725102743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.12451733152071635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.08312533299128215
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.08587732911109924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.08091199894746144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.08265066643555959
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.2137440045674642
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.07648533085982005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.08260266482830048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.08447466293970744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.06296533346176147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.08276266853014629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.05866133173306783
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.06259733438491821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.06411199768384297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.05872533222039541
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.05864533285299937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.05604266623655955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.05834666887919108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.0581226646900177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.47115198771158856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.05849599838256836
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.47439467906951904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.43083735307057697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.11724266409873962
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.4960533380508423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.4361813465754191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.2744373281796773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.27127466599146527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.25149865945180255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.05738666653633118
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.22902933756510416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.25880000988642377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.25971200068791706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.23433599869410196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.1513706644376119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.25356799364089966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.1525226632754008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.1410719950993856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.4812693198521932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.13223466277122498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.12618666887283325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.1405119995276133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.14144532879193625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.1322986682256063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.08387733499209087
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.08494933446248372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.0825973351796468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.07858133316040039
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.08098666866620381
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.08095466593901317
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.0824480007092158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.05235733091831207
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.05434666574001312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.051781331499417625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.04598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.04818666477998098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.24810133377710977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.13134933511416116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.04819199939568838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.04378133515516917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.04404266675313314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.04468800127506256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.04331733286380768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.314522663752238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.3153546651204427
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.28484266996383667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.32374932368596393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.32425065835316974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.29340799649556476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.18521066506703696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.1888320048650106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.047728002071380615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.17157334089279175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.1569439967473348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.17682133118311563
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.17685866355895996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.16195733348528543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.10358400146166484
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.10577066739400227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.10143466790517171
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.09199466307957967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.1731839974721273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.16857065757115683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.08801600337028503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.09539199868837993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.09599467118581136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.09303466478983562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.062133332093556724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.06266666452089946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.06058133145173391
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.05927466849486033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.06012799839178721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.059989333152770996
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.06247466802597046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.09511466821034749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.044639999667803444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.042805333932240806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.044256001710891724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.04107200105985006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.04394133388996124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.03945599993069967
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.037791999677817024
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.037151999771595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.05600533386071523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.32442667086919147
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.3155413269996643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.32391466697057086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.3337973356246948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.3286079963048299
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.1898826758066813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.18499199549357095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.18442134062449136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.17133333285649618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.17149333159128824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.17135467131932577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.1788640022277832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.17299733559290567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.17648533980051676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.10619200269381206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.3227413296699524
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.10544533530871074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.09109333157539368
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.09243733684221904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.09347732861836751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.09717333316802979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.09515200058619182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.09897067149480183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.04008533308903376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.059445331494013466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.06131199995676676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.05888533095518748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.05498133103052775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.058448001742362976
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.05706666906674703
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.040234667559464775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.03808533400297165
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.037861332297325134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.10643200079600017
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.03134933362404505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.033930666744709015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.031162666777769726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.038015998899936676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.029946667452653248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.060496002435684204
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.02810666710138321
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.2675199906031291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.2815893292427063
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.2788800001144409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.27476799488067627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.2866080005963643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.16120533148447672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.1409226655960083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.15064533551534018
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.14783466855684915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.1627253293991089
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.1453439990679423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.09073066711425781
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.09366400043169658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.07692799965540568
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.08103999992211659
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.1634719967842102
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.07906666894753774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.08571733037630717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.050106664498647056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.05219733218352
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.2609493335088094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.04814399778842926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.04837866624196371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.033701332906881966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.03177600105603536
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.03192000091075897
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.022101332743962605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.12475732962290446
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.12359999616940816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.13844799995422363
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.1305333375930786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.12705600261688232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.14447999993960062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.08323733508586884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.08242133259773254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.08554133772850037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.07056533296902974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.0699893335501353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.07212799787521362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.07254933317502339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.0711946686108907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.08215466638406117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.04404800136884054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.09165333708127339
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.04051200052102407
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.029887999097506206
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.08191466828187306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.02587199956178665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.02083733429511388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.019999999552965164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.019776000330845516
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.07247466842333476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.07266666491826375
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.07467733323574066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.07497066756089528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.08224000036716461
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.0458133320013682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.044122666120529175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.04229333500067393
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.044981335600217186
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.04483200112978617
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.02288000037272771
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.07501866420110066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.017770666629076004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.01775466650724411
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.05202666421731313
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.05205333232879639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.033957332372665405
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.035216001172860466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.023669332265853882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.024218666056791942
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.051311999559402466
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.01607999950647354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.018565333137909572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.01565333331624667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.041850666205088295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.04414933423201243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.030453334252039593
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.016122666498025257
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.016058667252461117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.015669333438078564
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.03793599953254064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.025807999074459076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.02701866626739502
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.020143999407688778
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.01781333362062772
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.01599466676513354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,0,0.033802665770053864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,0,0.024330665667851765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,2,128,0,1,fp8,fp8,0,0.024154665569464367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,4,128,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,1,128,0,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,4,2,128,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,0,0.01863466699918111
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,4,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,2,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,4,128,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,4,2,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,4,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,1,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,4,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,4,128,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,2,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,4,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,1,128,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,4,2,128,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,4,128,0,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,1,128,0,1,fp8,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,4,2,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,0.9133280118306478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,0.5298399925231934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,4,1,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,0.5339306592941284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.4846133391062419
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,0.5239893198013306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,0.5277599891026815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.47616533438364667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,0.8226666450500488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.3366239865620931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.30949334303538006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.3335253397623698
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.33793067932128906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,0.9382080237070719
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.21889599164326987
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.22203733523686728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.2077173391977946
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.3357280095418294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.2187839945157369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.20785067478815714
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,0.5740053256352743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,0.5749013423919678
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.3041920065879822
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.5172213315963745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.34541865189870197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.3162879943847656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.33577601114908856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.3402080138524373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.3070506652196248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.2204373280207316
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.2196000019709269
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.20150399208068848
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.21684799591700235
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.35182400544484455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.21760533253351846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.19768534104029337
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.168122669061025
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.2210293412208557
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.16288533806800842
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.16699200868606567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.16769067446390787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.16060800353686014
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.44050665696461994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.4276053508122762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.38997332255045575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.2744160095850627
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.16877333323160806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.24662399291992188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.2632053295771281
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.26793066660563153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.23876800139745077
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.16461333632469177
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.1638879974683126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.2691466609636943
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.16037333011627197
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.16240533192952475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.1423679987589518
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.14417066176732382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.1381600002447764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.14307733376820883
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.15107733011245728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.14851199587186178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,0.5515040159225464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,0.5439733266830444
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.4948693513870239
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.3213706612586975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.3265119989713033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.14246400197347006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.2890773415565491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.13622933626174927
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.30662399530410767
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.2802613377571106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.19583467642466226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.18083200852076212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.19087467590967813
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.1941759983698527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.17495999733606973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.1239359974861145
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.12589333454767862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.11918399731318156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.12408533692359924
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.12397332986195882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.1176639993985494
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.11870933572451274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.11770133177439372
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.11391466856002808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.11715733011563619
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.11855467160542806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.11136000355084737
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.3514773448308309
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.35025068124135333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.31619733572006226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.315013329188029
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.2132800022761027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.200981338818868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.2023306687672933
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.2030293345451355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.18416533867518106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.13054933150609335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.11962133646011353
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.21218132972717285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.12893866499265036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.19368533293406168
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.11754133303960164
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.09725866715113322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.09725866715113322
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.09302399555842082
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.09724799791971843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.09714133540789287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.09282666444778442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.09341333309809367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.09292800227801006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.08913066983222961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.09278933207194011
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.09322133660316467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.08692266543706258
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.3666293223698934
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.3578186829884847
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.12581866979599
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.2114293376604716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.19158399105072021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.1272479991118113
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.20010666052500406
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.2011893391609192
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.18417600790659586
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.12199466427167256
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.1223306655883789
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.11756267150243123
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.11859732866287231
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.12035733461380005
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.10929600397745769
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.07665599882602692
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.07701333363850911
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.07356800138950348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.07514666517575581
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.0766186664501826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.07269333302974701
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.07032000025113423
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.07073066631952922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.0703359991312027
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.07055466870466869
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.32580800851186115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.06653333206971486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.06832000116507213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.06443200012048085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.06858133276303609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.06437333424886067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.24165334304173788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.2389706571896871
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.21673067410786948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.14314666390419006
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.13337066769599915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.1332319974899292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.13356799880663553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.1250986655553182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.0844053328037262
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.14061866203943887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.0858026643594106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.08061866462230682
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.0830026666323344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.07686399916807811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.06242666641871134
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.06247466802597046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.060720001657803856
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.05804799993832906
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.056277334690093994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.05641599992911021
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.05439466734727224
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.05700799822807312
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.05684266487757365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.05619200070699056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.08161599934101105
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.05410666763782501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.05231466889381409
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.261845330397288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.26101332902908325
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.24047466119130453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.15370133519172668
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.14215466380119324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.21022399266560873
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.1441333293914795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.13953600327173868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.13238400220870972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.08523733417193095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.08551466464996338
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.08268266419569652
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.08226666847864787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.08232533435026805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.07750399907430013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.05376533170541128
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.05448000133037567
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.05240533252557119
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.05221333106358846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.05579199890295664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.04822400212287903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.043552001317342125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.044624000787734985
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.04420800010363261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.15203733245531717
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.04231466849644979
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.04807466765244802
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.17351466417312622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.17686933279037476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.16593066851298013
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.10146133104960124
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.09319999814033508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.09568533301353455
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.0934879978497823
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.06084799766540527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.06431999802589417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.05941333373387655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.060421332716941833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.044266665975252785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.04432533184687296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.040031999349594116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.03637866675853729
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.175327996412913
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.17534933487574259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.17479999860127768
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.10332799951235454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.10276266932487488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.09506133198738098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.09883200128873189
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.05874133110046387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.05783999959627787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.10738666852315266
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.056661332647005715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.1043893297513326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.05440000196297964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.09654933214187622
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.03769599894682566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.03338133295377096
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.0317546675602595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.059279998143514
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.029109333952267964
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.1452906628449758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.14225600163141885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.09099200367927551
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.08733333150545756
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.0916319986184438
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.0788213312625885
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.07843199868996938
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.08566400408744812
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.05020266771316528
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.054378668467203774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.03194133440653483
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.02569066733121872
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.023792001108328503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.15432000160217285
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.07253866891066234
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.0705866664648056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.08109866579373677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.044026667873064675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.042261332273483276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.028773332635561626
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.019823999454577763
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.02045866722861926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.04427733520666758
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.04438933233420054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.02975466599067052
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.018624000251293182
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.016650666793187458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.03504000107447306
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.017871999492247898
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.047237331668535866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.01646399994691213
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.017738666385412216
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.02992533395687739
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.020799999435742695
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,0,0.0262719988822937
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,256,1,2,1,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,2,128,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,0,0.020047999918460846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,2,1,128,0,1,fp8,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,1,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,2,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,2,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,2,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,2,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,2,128,0,1,fp8,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,2,1,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,2,1,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,2,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,2,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,fp8,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,0,0.015775999675194424
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,2,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,2,1,128,0,1,fp8,fp8,0,0.015967999895413715
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.33480532964070636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.30560000737508136
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.21580799420674643
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.21744000911712646
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.20508267482121786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.20753600200017294
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.20786666870117188
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.19760000705718994
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.2174826661745707
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.21770666042963663
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.19737066825230917
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.33581332365671795
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.1567200024922689
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.16082666317621866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.1532639960447947
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.1586026648680369
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.1594986617565155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.16351466377576193
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.1648373305797577
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.14029332995414734
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.1401653289794922
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.1606773336728414
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.13591466347376505
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.13643200198809305
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.12963733077049255
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.19933332999547324
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.19755733013153076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.18226132790247598
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.12244799733161926
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.14888532956441244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.11733333269755046
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.11572266618410747
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.11566932996114095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.10957866907119751
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.11343999703725179
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.10734400153160095
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.12753599882125854
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.1297760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.11941333611806233
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.09500267108281453
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.09711466232935588
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.09102400143941243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.09175999959309895
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.09118400017420451
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.08716266353925069
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.08879466851552327
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.13338133692741394
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.08467732866605122
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.12410666545232137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.12078932921091716
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.07513066629568736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.07842133442560832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.06968000034491222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.07014399766921997
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.06674133241176605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.06659733255704244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.06431999802589417
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.06389333307743073
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.0825493335723877
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.08072533210118611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.06202666461467743
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.05624533196290334
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.1240053375562032
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.054330666859944664
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.05365333457787832
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.05331199864546458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.05217066903909048
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.05216533442338308
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.04965866605440775
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.08354133367538452
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.08635200063387553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.05598933498064677
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.05322133501370748
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.05418133238951365
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.05187733471393585
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.04177600145339966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.039813332259655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.039594667653242745
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.06145066519578298
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.06403199831644694
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.060362666845321655
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.08988266189893086
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.045594667394955955
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.04066666712363561
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.0360959991812706
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.03376533339420954
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.05940799911816915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.037231999138991036
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.03928533444801966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.03201599915822347
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.027717334528764088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.027930667002995808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.027952000498771667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.02703999976317088
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.04977599779764811
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.0524533341328303
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.024495999018351238
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.03173866619666418
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.029098667204380035
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.019962667177120846
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.01865600049495697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.018053332964579265
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.01616000011563301
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,128,1,1,1,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,1,1,1,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,32,1,1,1,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,16,1,1,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,4,1,1,1,128,0,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,2,1,1,1,128,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,1,1,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,8,1,1,1,128,0,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc6,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.016282666474580765
