framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,7.636095682779948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,10.353637059529623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,6.856559753417969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,19.09557851155599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,3.6720425287882485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,1,128,1,float16,float16,0,107.32589721679688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,64,128,1,float16,float16,0,110.73187255859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,3.868933359781901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,5.235941251118978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,1,128,1,float16,float16,0,213.96907552083334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,8,128,1,float16,float16,0,218.45135498046875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,2,128,1,float16,float16,0,106.83634440104167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,4,128,1,float16,float16,0,217.2037556966146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,2,128,1,float16,float16,0,212.72163899739584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,9.737775802612305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,64,128,1,float16,float16,0,55.64600118001302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,1,128,1,float16,float16,0,52.99836222330729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,1.8361226717631023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,2.72325865427653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,2,128,1,float16,float16,0,51.39262898763021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,2.040821393330892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,4,128,1,float16,float16,0,107.94673665364583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,5.03494389851888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,8,128,1,float16,float16,0,108.7432352701823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,0.9165279865264893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,228.6157430013021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,1.0118239720662434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,4,128,1,float16,float16,0,53.97607930501302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,1,128,1,float16,float16,0,27.18407948811849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,64,128,1,float16,float16,0,28.923856099446613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,8,128,1,float16,float16,0,50.68907674153646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,116.7638448079427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.541840076446533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,2,128,1,float16,float16,0,25.669835408528645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,5.2837066650390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,4,128,1,float16,float16,0,27.138959248860676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,5.623402913411458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,8,128,1,float16,float16,0,25.814420064290363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,7.573584238688151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,1.2937493324279785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,57.0379384358724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,12.335461934407553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,2.770554542541504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,64,128,1,float16,float16,0,62.70451863606771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,1,128,1,float16,float16,0,59.31394958496094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,3.0153172810872397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,3.8365119298299155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,2,128,1,float16,float16,0,58.50812784830729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,6.520122528076172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,1,128,1,float16,float16,0,120.58920288085938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,2,128,1,float16,float16,0,125.1787109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,4,128,1,float16,float16,0,124.53666178385417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,1.3757920265197754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,8,128,1,float16,float16,0,124.38512166341145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,1.5046879450480144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,64,128,1,float16,float16,0,32.31959533691406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,1.8788053194681804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,4,128,1,float16,float16,0,60.845052083333336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.155498822530111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,1,128,1,float16,float16,0,30.463152567545574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,8,128,1,float16,float16,0,59.11693827311198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,135.3866984049479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,2,128,1,float16,float16,0,29.743723551432293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,0.6908640066782633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,0.7449653148651123
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,4,128,1,float16,float16,0,29.477999369303387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,64,128,1,float16,float16,0,15.75040054321289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,0.981935977935791
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.6149600346883137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,1,128,1,float16,float16,0,16.240325927734375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,8,128,1,float16,float16,0,31.21636708577474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,2,128,1,float16,float16,0,14.72433090209961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,4.274191856384277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,4,128,1,float16,float16,0,13.714944203694662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,4.687066713968913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,8,128,1,float16,float16,0,14.611221313476562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,64.3828633626302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,6.076576232910156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,34.68072001139323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,9.437557220458984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,2.1340266863505044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,64,128,1,float16,float16,0,45.410624186197914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,1,128,1,float16,float16,0,43.16688028971354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,2.4765013058980307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,2,128,1,float16,float16,0,41.447408040364586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,3.0864054361979165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,5.211557388305664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,1,128,1,float16,float16,0,88.61839803059895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,2,128,1,float16,float16,0,86.33221435546875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,4,128,1,float16,float16,0,85.7827860514323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,1.1428266366322835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,8,128,1,float16,float16,0,86.94252522786458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,1.2520159880320232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,64,128,1,float16,float16,0,22.701711018880207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,1.5240586598714192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,1,128,1,float16,float16,0,20.796192169189453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,4,128,1,float16,float16,0,43.96741231282552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,8,128,1,float16,float16,0,43.55595906575521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.682959874471029
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,2,128,1,float16,float16,0,20.523780822753906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,94.7379659016927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,0.568394660949707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,4,128,1,float16,float16,0,20.841040293375652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,64,128,1,float16,float16,0,11.366650899251303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,0.6015626589457194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,1,128,1,float16,float16,0,10.171632130940756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,0.7887787024180094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.2453813552856445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,8,128,1,float16,float16,0,21.39214324951172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,2,128,1,float16,float16,0,10.509237289428711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,4,128,1,float16,float16,0,10.626906712849935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,8,128,1,float16,float16,0,9.985589345296225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,22.204238891601562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,47.859100341796875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,7.179983774820964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,7.724154790242513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,9.337066650390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,13.713882446289062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,3.3596213658650718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,1,128,1,float16,float16,0,53.245707194010414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,3.682053248087565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,64,128,1,float16,float16,0,60.83636983235677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,4.709413210550944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,2,128,1,float16,float16,0,53.589640299479164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,6.862106959025065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,1,128,1,float16,float16,0,114.4243876139323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,2,128,1,float16,float16,0,117.62354532877605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,4,128,1,float16,float16,0,111.52733357747395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,8,128,1,float16,float16,0,114.24831136067708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,1.8136320114135742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,1.952474594116211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,4,128,1,float16,float16,0,55.10792541503906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,64,128,1,float16,float16,0,30.84955088297526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,2.252117315928141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,1,128,1,float16,float16,0,27.673179626464844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.655461311340332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,2,128,1,float16,float16,0,29.06860860188802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,8,128,1,float16,float16,0,55.834086100260414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,4,128,1,float16,float16,0,26.81232452392578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,0.9213919639587402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,0.9739680290222168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,64,128,1,float16,float16,0,14.781621297200521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,121.57761637369792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.1589546998341878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.7832105954488118
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,1,128,1,float16,float16,0,13.427258809407553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,8,128,1,float16,float16,0,27.214884440104168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,2,128,1,float16,float16,0,13.48244857788086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.4315679868062337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,64,128,1,float16,float16,0,6.01091194152832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,4,128,1,float16,float16,0,13.975391387939453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.4696906805038452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,1,128,1,float16,float16,0,5.88585090637207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,61.009012858072914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.5643253326416016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,8,128,1,float16,float16,0,13.596757253011068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,2,128,1,float16,float16,0,5.156389236450195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.932266632715861
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,30.588053385416668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,4,128,1,float16,float16,0,6.109551747639974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,8,128,1,float16,float16,0,6.244911829630534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,5.151546796162923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,15.200111389160156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,5.761706670125325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,6.380442937215169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,9.547072092692057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,2.5069546699523926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,1,128,1,float16,float16,0,31.71961720784505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,64,128,1,float16,float16,0,36.41584014892578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,2.731727917989095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.2265758514404297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,2,128,1,float16,float16,0,30.75487009684245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,4.849290529886882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,1,128,1,float16,float16,0,65.41996765136719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,2,128,1,float16,float16,0,66.67656962076823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,4,128,1,float16,float16,0,65.81693013509114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.384533405303955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,8,128,1,float16,float16,0,65.73890686035156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.4672692616780598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,64,128,1,float16,float16,0,17.04647445678711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,4,128,1,float16,float16,0,33.71485392252604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.759424050649007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,1,128,1,float16,float16,0,15.932907104492188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,69.91767883300781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,8,128,1,float16,float16,0,32.27635701497396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,2.5098560651143393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,0.6801813443501791
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,2,128,1,float16,float16,0,15.829636891682943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.7274239857991537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,4,128,1,float16,float16,0,15.872453053792318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,64,128,1,float16,float16,0,8.344298680623373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,1,128,1,float16,float16,0,6.855119705200195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.8620533148447672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,2,128,1,float16,float16,0,6.070170720418294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,8,128,1,float16,float16,0,16.155685424804688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,4,128,1,float16,float16,0,6.946544011433919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,35.07156880696615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,19.23966344197591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,1.1960960229237874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.3569440046946208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.2860746582349141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,64,128,1,float16,float16,0,3.843637466430664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.4018719991048177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,2,128,1,float16,float16,0,3.3317492802937827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,1,128,1,float16,float16,0,3.1759732564290366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,8,128,1,float16,float16,0,6.586943944295247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,4,128,1,float16,float16,0,3.064000129699707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,8,128,1,float16,float16,0,3.1345545450846353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.6160906553268433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,8.089317321777344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,7.115493138631185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,8.073530832926432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,6.680666605631511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,11.242165883382162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.5914719899495444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,64,128,1,float16,float16,0,33.65831502278646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,1,128,1,float16,float16,0,28.94415537516276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,3.791680018107096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,2,128,1,float16,float16,0,59.231404622395836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,2,128,1,float16,float16,0,29.730875651041668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,4.344218571980794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,4,128,1,float16,float16,0,60.2860361735026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,8,128,1,float16,float16,0,60.63737487792969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,1,128,1,float16,float16,0,60.87743631998698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,5.608101526896159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.7926665941874187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.7724372545878093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,64,128,1,float16,float16,0,16.677247365315754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,4,128,1,float16,float16,0,29.187909444173176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,1,128,1,float16,float16,0,15.831802368164062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,2,128,1,float16,float16,0,12.700922648111979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,2.1861066818237305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,65.37403869628906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,4,128,1,float16,float16,0,14.997231801350912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,2.8135999043782554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,8,128,1,float16,float16,0,31.006062825520832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.8481012980143229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.9014826615651449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,64,128,1,float16,float16,0,7.3961334228515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,1.0455786387125652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,2,128,1,float16,float16,0,5.648261388142903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,1.408506711324056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,1,128,1,float16,float16,0,5.659274419148763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,8,128,1,float16,float16,0,13.602778116861979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,4,128,1,float16,float16,0,6.854719797770183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,33.73196919759115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.4675360123316447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,64,128,1,float16,float16,0,3.7756640116373696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.45239468415578205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,8,128,1,float16,float16,0,6.5862242380778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,1,128,1,float16,float16,0,3.1186561584472656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,2,128,1,float16,float16,0,2.948431968688965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.7583680152893066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.5407893260320028
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.0551093320051829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,4,128,1,float16,float16,0,2.948080062866211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,8,128,1,float16,float16,0,2.7790772120157876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,64,128,1,float16,float16,0,1.8291999499003093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.07285333176453908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,1,128,1,float16,float16,0,1.4151946703592937
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.13834666212399802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,7.5944264729817705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,16.46124267578125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,2,128,1,float16,float16,0,1.415328025817871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,4,128,1,float16,float16,0,1.5045973459879558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.34980801741282147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,8,128,1,float16,float16,0,1.4237066904703777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,3.6342560450236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,5.66873041788737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,5.385887781778972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,6.364613215128581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,8.27778689066569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,2.506949265797933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,64,128,1,float16,float16,0,19.341472625732422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,2,128,1,float16,float16,0,34.08437856038412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,1,128,1,float16,float16,0,17.56165313720703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,1,128,1,float16,float16,0,34.91891225179037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,4,128,1,float16,float16,0,35.24235280354818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,2.826479911804199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,8,128,1,float16,float16,0,35.803914388020836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,2,128,1,float16,float16,0,17.099573771158855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,38.23485819498698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,1.349455992380778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,3.0142027537027993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,4.117685317993164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,1.4103199640909831
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,1,128,1,float16,float16,0,7.9893442789713545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,1.4911680221557617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,64,128,1,float16,float16,0,9.522848129272461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,2,128,1,float16,float16,0,6.263317108154297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,4,128,1,float16,float16,0,15.913983662923178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,4,128,1,float16,float16,0,6.555376052856445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,1.9946613311767578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,8,128,1,float16,float16,0,17.325093587239582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.6339413324991862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.6620800097783407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,64,128,1,float16,float16,0,4.603909174601237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.7443040211995443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,1,128,1,float16,float16,0,3.2287893295288086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,2,128,1,float16,float16,0,3.199146588643392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,8,128,1,float16,float16,0,7.587765375773112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.9766560395558676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,4,128,1,float16,float16,0,3.411264101664225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.3209013342857361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,8,128,1,float16,float16,0,3.3465919494628906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,19.07414372762044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,1,128,1,float16,float16,0,1.6255680720011394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,64,128,1,float16,float16,0,2.251311937967936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.3587840000788371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.35239466031392414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,2,128,1,float16,float16,0,1.600645383199056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,9.140623728434244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.45440534750620526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.03945599993069967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,64,128,1,float16,float16,0,1.154901345570882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,8,128,1,float16,float16,0,1.7592159907023113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,4,128,1,float16,float16,0,1.7641760508219402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,1,128,1,float16,float16,0,0.8965600331624349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.0459199994802475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.08789333701133728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,4.362186749776204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,8,128,1,float16,float16,0,0.8962720235188802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.14128533005714417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,2,128,1,float16,float16,0,0.8716586430867513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,4,128,1,float16,float16,0,0.9027466773986816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,2.1821494102478027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,6.690042495727539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,7.061429341634114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,7.8099517822265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,10.174554824829102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,3.335599899291992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,1,128,1,float16,float16,0,16.314058939615887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,64,128,1,float16,float16,0,19.89095942179362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,1,128,1,float16,float16,0,33.97925313313802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,4,128,1,float16,float16,0,34.07826232910156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,3.520122528076172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,2,128,1,float16,float16,0,16.159530639648438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,8,128,1,float16,float16,0,34.45714569091797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,2,128,1,float16,float16,0,34.34275817871094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,3.9519786834716797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,4.902821222941081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,1.6724319458007812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,1.7515840530395508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,64,128,1,float16,float16,0,9.321968078613281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,4,128,1,float16,float16,0,16.27188237508138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,8,128,1,float16,float16,0,16.377221425374348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,2.0792694091796875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,1,128,1,float16,float16,0,7.129994710286458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,2,128,1,float16,float16,0,6.875445048014323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,4,128,1,float16,float16,0,5.964127858479817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,8,128,1,float16,float16,0,6.465338389078776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,38.07173411051432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,19.497455596923828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.845957358678182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,2.46724271774292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.9380426406860352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,4,128,1,float16,float16,0,3.243093490600586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,1,128,1,float16,float16,0,3.1280107498168945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.960261344909668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,2,128,1,float16,float16,0,3.0799732208251953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,64,128,1,float16,float16,0,4.761514663696289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,8,128,1,float16,float16,0,3.321722666422526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.41715200742085773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.4294666846593221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,1.2210133075714111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,1,128,1,float16,float16,0,1.5914506912231445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.4702773491541545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,64,128,1,float16,float16,0,2.407599925994873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.5768213272094727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,4,128,1,float16,float16,0,1.561482588450114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,2,128,1,float16,float16,0,1.5590400695800781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.04442666471004486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,1,128,1,float16,float16,0,0.820192019144694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,8,128,1,float16,float16,0,1.6606292724609375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,64,128,1,float16,float16,0,1.1917760372161865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.06004266440868378
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,2,128,1,float16,float16,0,0.7887307008107504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,4,128,1,float16,float16,0,0.8306933244069418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.24824533859888712
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,4.170783996582031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,8,128,1,float16,float16,0,0.8388533592224121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,8.936266581217447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.08589866757392883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.03230933348337809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,1.05131196975708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,1,128,1,float16,float16,0,0.39902400970458984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,2,128,1,float16,float16,0,0.3976586659749349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,64,128,1,float16,float16,0,0.5979040066401163
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.027610667049884796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,2.0975680351257324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,4,128,1,float16,float16,0,0.3980960051218669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.04620266457398733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.07573866844177246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,8,128,1,float16,float16,0,0.4057759841283162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,6.005434672037761
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,7.0104319254557295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,5.019621213277181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,5.281706809997559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,1,128,1,float16,float16,0,19.472965240478516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,8,128,1,float16,float16,0,19.788687388102215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,4,128,1,float16,float16,0,19.835290273030598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,2.5009387334187827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,2.638495922088623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,1,128,1,float16,float16,0,8.52186648050944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,2,128,1,float16,float16,0,20.198095957438152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,2,128,1,float16,float16,0,9.111957550048828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,2.9306186040242515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,64,128,1,float16,float16,0,12.048858642578125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,4,128,1,float16,float16,0,9.238506952921549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,3.5415306091308594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,1.3551573753356934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,8,128,1,float16,float16,0,9.394784291585287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,1,128,1,float16,float16,0,3.651434580485026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,1.335770606994629
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,2,128,1,float16,float16,0,3.724010785420736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,64,128,1,float16,float16,0,6.067002614339192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,1.590437412261963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,4,128,1,float16,float16,0,4.181685447692871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,22.504160563151043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,8,128,1,float16,float16,0,3.744762738545736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,1.7764959335327148
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.6348533233006796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,64,128,1,float16,float16,0,3.038976033528646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,10.696122487386068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.7148213386535645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,2,128,1,float16,float16,0,1.7771573066711426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.7086933453877767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,4,128,1,float16,float16,0,1.8635892868041992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,1,128,1,float16,float16,0,1.824453353881836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,64,128,1,float16,float16,0,1.490250587463379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,8,128,1,float16,float16,0,1.8686347007751465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.9070080121358236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.2676266630490621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,4,128,1,float16,float16,0,0.9675892988840739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,1,128,1,float16,float16,0,0.9385973612467448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.3572640021642049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.3932373523712158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.33776533603668213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,5.3566023508707685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,2,128,1,float16,float16,0,0.9428106943766276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,8,128,1,float16,float16,0,1.0067520141601562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,64,128,1,float16,float16,0,0.7563839753468832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,1,128,1,float16,float16,0,0.4850826660792033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.03758399933576584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.04269866645336151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,4,128,1,float16,float16,0,0.5119893153508505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,2,128,1,float16,float16,0,0.4898293415705363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,2.6725972493489585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.06281599899133046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,8,128,1,float16,float16,0,0.5134880145390829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,1,128,1,float16,float16,0,0.2590879996617635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,64,128,1,float16,float16,0,0.3867306709289551
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.028490667541821797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,2,128,1,float16,float16,0,0.2443466583887736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.10461866855621338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.024703999360402424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.036090667049090065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,8,128,1,float16,float16,0,0.24513065814971924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,4,128,1,float16,float16,0,0.2531893253326416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,1.2982827027638753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.06157866617043813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.6547946532567342
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,7.041525522867839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,7.1866455078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,9.993605295817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,8.032634735107422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,1,128,1,float16,float16,0,19.3986078898112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,2,128,1,float16,float16,0,19.409066518147785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,1,128,1,float16,float16,0,7.817477544148763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,8,128,1,float16,float16,0,20.518789927164715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,64,128,1,float16,float16,0,13.483856201171875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,3.5170774459838867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,3.3400214513142905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,2,128,1,float16,float16,0,8.548458735148111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,4,128,1,float16,float16,0,8.08836809794108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,4.233333269755046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,4,128,1,float16,float16,0,20.121877034505207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,4.656847953796387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,8,128,1,float16,float16,0,7.950634638468425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,64,128,1,float16,float16,0,6.686069488525391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,1.7502133051554363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,1.6712320645650227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,22.845540364583332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,1,128,1,float16,float16,0,3.398687998453776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,2.0837546984354653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,8,128,1,float16,float16,0,3.8322668075561523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,4,128,1,float16,float16,0,3.8808161417643228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,2.3238186836242676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.8494719664255778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,64,128,1,float16,float16,0,3.4696105321248374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,2,128,1,float16,float16,0,3.4870665868123374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,11.399429321289062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.9352373282114664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,1,128,1,float16,float16,0,1.7132800420125325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,4,128,1,float16,float16,0,1.779354731241862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,1.017573356628418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,1.2320640087127686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,2,128,1,float16,float16,0,1.755568027496338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,1,128,1,float16,float16,0,0.8880106608072916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.44916268189748126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,64,128,1,float16,float16,0,1.6910026868184407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,8,128,1,float16,float16,0,1.931114673614502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.42896000544230145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,4,128,1,float16,float16,0,0.9332213401794434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,2,128,1,float16,float16,0,0.9139573574066162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,5.714053471883138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.5296479860941569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.48872001965840656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,8,128,1,float16,float16,0,0.9359573523203532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,64,128,1,float16,float16,0,0.8257333437601725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.04513066510359446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,1,128,1,float16,float16,0,0.47545599937438965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.08479467034339905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,2.884746551513672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.061903998255729675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,4,128,1,float16,float16,0,0.4867146809895833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,2,128,1,float16,float16,0,0.4771786530812581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,8,128,1,float16,float16,0,0.4918186664581299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.2225280006726583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.027744000156720478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,2,128,1,float16,float16,0,0.22477867205937704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,1,128,1,float16,float16,0,0.22721066077550253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.02924799919128418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,64,128,1,float16,float16,0,0.42599467436472577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,1.3893280029296875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,8,128,1,float16,float16,0,0.2315466602643331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,64,128,1,float16,float16,0,0.15516799688339233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,4,128,1,float16,float16,0,0.2339359919230143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.1334826648235321
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.03940266619126002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.057946667075157166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.13518933455149332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.6746773719787598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,2,128,1,float16,float16,0,0.12284266948699951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,1,128,1,float16,float16,0,0.12443733215332031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.14164266983668009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,8,128,1,float16,float16,0,0.12601600090662637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,4,128,1,float16,float16,0,0.1276639997959137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.15520000457763672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.45632000764211017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,7.18342399597168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,7.0542348225911455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,1,128,1,float16,float16,0,12.227072397867838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,7.826943715413411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,1,128,1,float16,float16,0,4.262912114461263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,9.35368537902832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,3.3424161275227866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,2,128,1,float16,float16,0,4.9606507619222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,2,128,1,float16,float16,0,12.374015808105469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,4,128,1,float16,float16,0,12.99948247273763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,8,128,1,float16,float16,0,13.69253921508789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,3.513615926106771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,64,128,1,float16,float16,0,12.803824106852213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,4,128,1,float16,float16,0,4.699061393737793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,16.396116892496746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,3.9415413538614907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,5.001040140787761
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,1,128,1,float16,float16,0,2.132160027821859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,1.6684053738911946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,2,128,1,float16,float16,0,2.210458596547445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,8,128,1,float16,float16,0,5.35969607035319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,1.7521866162618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,2.0794453620910645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,8.408490498860678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,4,128,1,float16,float16,0,2.3263626098632812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,8,128,1,float16,float16,0,2.5885866483052573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.9142666657765707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,1,128,1,float16,float16,0,1.080181360244751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,2.4855945905049643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,4.3422346115112305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,64,128,1,float16,float16,0,6.423450469970703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.882591962814331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,4,128,1,float16,float16,0,1.1912000179290771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,64,128,1,float16,float16,0,3.235856056213379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,2,128,1,float16,float16,0,1.1050933202107747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,1.0136853059132893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,64,128,1,float16,float16,0,1.746869405110677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,2,128,1,float16,float16,0,0.5676533381144205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,8,128,1,float16,float16,0,1.2999093532562256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.46271999677022296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,1,128,1,float16,float16,0,0.550597349802653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.4511466821034749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,1.2198399702707927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,2.048528035481771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.4537760019302368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,4,128,1,float16,float16,0,0.6022080183029175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,64,128,1,float16,float16,0,0.8734613259633383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.5709706544876099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,8,128,1,float16,float16,0,0.6381440162658691
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.04832000037034353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,2,128,1,float16,float16,0,0.29685866832733154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.059418668349583946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,4,128,1,float16,float16,0,0.3179840048154195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,8,128,1,float16,float16,0,0.3338346481323242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.2225066622098287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.0925546685854594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,64,128,1,float16,float16,0,0.39100801944732666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,1,128,1,float16,float16,0,0.13699199755986533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,1.0005653699239094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,1,128,1,float16,float16,0,0.29603199164072674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.02917333443959554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.027653334041436512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,4,128,1,float16,float16,0,0.14130666851997375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,2,128,1,float16,float16,0,0.13872533043225607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.036501333117485046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.04619733492533366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,1,128,1,float16,float16,0,0.07903466622034709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.1341546674569448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,2,128,1,float16,float16,0,0.07677866518497467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,64,128,1,float16,float16,0,0.11103467146555583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.3492639859517415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.46425068378448486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,8,128,1,float16,float16,0,0.14693333705266318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.14643733700116476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.13573333621025085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.13803199927012125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,4,128,1,float16,float16,0,0.07874133189519246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,8,128,1,float16,float16,0,0.08005333443482716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,64,128,1,float16,float16,0,0.051813334226608276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.0753119985262553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,2,128,1,float16,float16,0,0.04223466912905375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.07578133543332417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,1,128,1,float16,float16,0,0.04200000067551931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,4,128,1,float16,float16,0,0.046367997924486794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.0780266672372818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.08341866731643677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.16738667090733847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,8,128,1,float16,float16,0,0.04679466784000397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,3.3454507191975913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,1,128,1,float16,float16,0,3.890869458516439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,3.5228427251180015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,3.9604854583740234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,4,128,1,float16,float16,0,4.526869455973308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,2,128,1,float16,float16,0,4.427167892456055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,1,128,1,float16,float16,0,1.7073973019917805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,64,128,1,float16,float16,0,6.42624028523763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,1.682437260945638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,1.884335994720459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,8,128,1,float16,float16,0,5.475301106770833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,2,128,1,float16,float16,0,1.772330602010091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,4,128,1,float16,float16,0,1.9479093551635742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,5.036368052164714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,6.984133402506511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,2.097653388977051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,8,128,1,float16,float16,0,2.277578671773275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,1,128,1,float16,float16,0,0.8313013712565104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,2,128,1,float16,float16,0,0.9485226472218832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.9318239688873291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.9547359943389893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,3.4341065088907876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,2.3428427378336587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,4,128,1,float16,float16,0,0.9551466306050619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,64,128,1,float16,float16,0,3.223909378051758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.8466347058614095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,8,128,1,float16,float16,0,1.2150026957194011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.4166613419850667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.42787198225657147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,64,128,1,float16,float16,0,1.6224586168924968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,2,128,1,float16,float16,0,0.4681226809819539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,1.2325119972229004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.4573119878768921
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,1.7562665939331055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,8,128,1,float16,float16,0,0.5515520175298055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,1,128,1,float16,float16,0,0.20870399475097656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.5745866696039835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,4,128,1,float16,float16,0,0.4769279956817627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.04866133133570353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,2,128,1,float16,float16,0,0.21743466456731161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.0526506652434667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,1,128,1,float16,float16,0,0.41755199432373047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,64,128,1,float16,float16,0,0.8035893440246582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.08361599842707317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,4,128,1,float16,float16,0,0.24568533897399902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,8,128,1,float16,float16,0,0.2726186712582906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,64,128,1,float16,float16,0,0.38942933082580566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.18453333775202432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,1,128,1,float16,float16,0,0.09616532921791077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,2,128,1,float16,float16,0,0.09734933574994405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,4,128,1,float16,float16,0,0.10148266951243083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.3479199806849162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.7834293047587076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.03651199986537298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,1,128,1,float16,float16,0,0.051141331593195595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.13485866785049438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,8,128,1,float16,float16,0,0.10273599624633789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.1351626714070638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,64,128,1,float16,float16,0,0.07814933359622955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,2,128,1,float16,float16,0,0.05312533179918925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,4,128,1,float16,float16,0,0.05505066613356272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.042624001701672874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.1381119986375173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,8,128,1,float16,float16,0,0.056474665800730385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,1,128,1,float16,float16,0,0.029792000850041706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,64,128,1,float16,float16,0,0.03688533355792364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.14179199934005737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,4,128,1,float16,float16,0,0.031119999786218006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,8,128,1,float16,float16,0,0.031104000906149547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.07645333309968312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.07447466750939687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,2,128,1,float16,float16,0,0.030058667063713074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.3031839927037557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.0751039981842041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.12898666659990946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.07592533528804779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.07016000151634216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.04436799883842468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,1,128,1,float16,float16,0,0.018976000448067982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,4,128,1,float16,float16,0,0.019226666539907455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.04489066700140635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,2,128,1,float16,float16,0,0.01985599969824155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.04427733520666758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,64,128,1,float16,float16,0,0.02347733328739802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,8,128,1,float16,float16,0,0.019541333119074505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.04477333525816599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,1,128,1,float16,float16,0,1.8196372985839844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,1.677567958831787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,4,128,1,float16,float16,0,2.0783680280049643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,1.7513119379679363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,2,128,1,float16,float16,0,2.0475306510925293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,8,128,1,float16,float16,0,2.4225172996520996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,2.1056906382242837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,1,128,1,float16,float16,0,0.8862026532491049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.8467573324839274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,64,128,1,float16,float16,0,3.235408147176107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,2,128,1,float16,float16,0,0.9757333596547445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,2.5753226280212402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,3.293013254801432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,4,128,1,float16,float16,0,0.9980533123016357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.8849919637044271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,2,128,1,float16,float16,0,0.46145065625508624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.9475893179575602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,1,128,1,float16,float16,0,0.4487413167953491
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,8,128,1,float16,float16,0,1.2811520099639893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.4495679934819539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.42846401532491046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,4,128,1,float16,float16,0,0.4883893330891927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,64,128,1,float16,float16,0,1.640559991200765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.4569600025812785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,1.1662986278533936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,8,128,1,float16,float16,0,0.5816853443781534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.044138665000597634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,64,128,1,float16,float16,0,0.8054773012797037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.5810986757278442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,4,128,1,float16,float16,0,0.22510399421056113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,1,128,1,float16,float16,0,0.21686933437983194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.05407999952634176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.7094399929046631
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.08602666854858398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,8,128,1,float16,float16,0,0.2586666742960612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,1,128,1,float16,float16,0,0.0743999977906545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,64,128,1,float16,float16,0,0.3649066686630249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.18876800934473673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.02718399961789449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,2,128,1,float16,float16,0,0.20453333854675293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,2,128,1,float16,float16,0,0.07559466858704884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.029520000020662945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,8,128,1,float16,float16,0,0.0804799993832906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.03629333277543386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.30473599831263226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,1.607850710550944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,64,128,1,float16,float16,0,0.060565332571665444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.020784000555674236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,1,128,1,float16,float16,0,0.04065600037574768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.041696002086003624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,4,128,1,float16,float16,0,0.08172266681989034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,2,128,1,float16,float16,0,0.04148799926042557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.021370666722456615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,4,128,1,float16,float16,0,0.044863998889923096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,64,128,1,float16,float16,0,0.03105599929889043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.02628266563018163
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.09841066598892212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,8,128,1,float16,float16,0,0.045647998650868736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.024512000381946564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,1,128,1,float16,float16,0,0.024405332903067272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,8,128,1,float16,float16,0,0.026101333399613697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.018592000007629395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,2,128,1,float16,float16,0,0.02499733368555705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,4,128,1,float16,float16,0,0.025727999707063038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.052111998200416565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,64,128,1,float16,float16,0,0.018042666216691334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,2,128,1,float16,float16,0,0.015583999454975128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.01820266619324684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,8,128,1,float16,float16,0,0.01524266724785169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,4,128,1,float16,float16,0,0.014783999572197596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,64,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,1,128,1,float16,float16,0,0.010458666831254959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,2,128,1,float16,float16,0,0.010746666540702185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.013877333452304205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,4,128,1,float16,float16,0,0.010346666599313417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.028607999285062153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.01360000049074491
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,8,128,1,float16,float16,0,0.010319999729593595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.020554666717847187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.013823999712864557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,1,128,1,float16,float16,0,0.8966933091481527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,2,128,1,float16,float16,0,0.9424053033192953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,4,128,1,float16,float16,0,0.9967733224232992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,64,128,1,float16,float16,0,1.6421653429667156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,8,128,1,float16,float16,0,1.2685173352559407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,1,128,1,float16,float16,0,0.47810133298238117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,2,128,1,float16,float16,0,0.4733920097351074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,4,128,1,float16,float16,0,0.49982933203379315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.45497600237528485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.8855093320210775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,1.6023786862691243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.8446400165557861
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,8,128,1,float16,float16,0,0.6024159987767538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,1.1819360256195068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.9617866675059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.4285706679026286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,64,128,1,float16,float16,0,0.79530135790507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.5381866693496704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,1,128,1,float16,float16,0,0.2002293268839518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.050757333636283875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.7458346684773763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,8,128,1,float16,float16,0,0.27406932910283405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.07484800120194753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.045791998505592346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.2135466734568278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,2,128,1,float16,float16,0,0.20729599396387735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,4,128,1,float16,float16,0,0.24210667610168457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.4469226598739624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,1,128,1,float16,float16,0,0.06868266562620799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,2,128,1,float16,float16,0,0.07077333331108093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.02897600084543228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.2826613386472066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,4,128,1,float16,float16,0,0.07451733450094859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,64,128,1,float16,float16,0,0.05871999760468801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.019744000087181728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.04153066625197729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.0363520011305809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,64,128,1,float16,float16,0,0.38591468334198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.020773333807786305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,2,128,1,float16,float16,0,0.03923733284076055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,8,128,1,float16,float16,0,0.07562666634718578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.07258666555086772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.02500266581773758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,64,128,1,float16,float16,0,0.028175999720891316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,4,128,1,float16,float16,0,0.040778666734695435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.023525332411130268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,1,128,1,float16,float16,0,0.038373333712418876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.016127999871969223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,2,128,1,float16,float16,0,0.022042666872342426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.016229332735141117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,4,128,1,float16,float16,0,0.022842665513356526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.038949333131313324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,1,128,1,float16,float16,0,0.022117334107557934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,8,128,1,float16,float16,0,0.023056000471115112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.017664000391960144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.01803733284274737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,2,128,1,float16,float16,0,0.013359999905029932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,1,128,1,float16,float16,0,0.013455999394257864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.014256000518798828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.014362666755914688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,64,128,1,float16,float16,0,0.01640533283352852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,4,128,1,float16,float16,0,0.013717333475748697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,8,128,1,float16,float16,0,0.04151466737190882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,8,128,1,float16,float16,0,0.013658666362365087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.014490666488806406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,1,128,1,float16,float16,0,0.00933333362142245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.014554666976133982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,64,128,1,float16,float16,0,0.010837333897749582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,2,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,4,128,1,float16,float16,0,0.009317333499590555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.013568000247081121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.022250667214393616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.013440000514189402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,8,128,1,float16,float16,0,0.009343999748428663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,64,128,1,float16,float16,0,0.009461333354314169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,4,128,1,float16,float16,0,0.008816000074148178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,2,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.015930666277805965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,8,128,1,float16,float16,0,0.00884799969693025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.4335999886194865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.45444266001383465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,1,128,1,float16,float16,0,0.5323413213094076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,2,128,1,float16,float16,0,0.5512693325678507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,4,128,1,float16,float16,0,0.5669386784235636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.5150719881057739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,64,128,1,float16,float16,0,0.7996799945831299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,8,128,1,float16,float16,0,0.6365333398183187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.6110399961471558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.8019413153330485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,1,128,1,float16,float16,0,0.26235200961430866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,2,128,1,float16,float16,0,0.26733332872390747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.06487466891606648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,4,128,1,float16,float16,0,0.2935093243916829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,64,128,1,float16,float16,0,0.3665119806925456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,8,128,1,float16,float16,0,0.301530659198761
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.15074132879575095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.2367253303527832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.09086933732032776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,1,128,1,float16,float16,0,0.11918399731318156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,2,128,1,float16,float16,0,0.12088533242543538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,4,128,1,float16,float16,0,0.12498133381207784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.033215999603271484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,8,128,1,float16,float16,0,0.12505066394805908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.3707893292109172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,64,128,1,float16,float16,0,0.10637332995732625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.042064001162846885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.056613331039746605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.03109866629044215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,1,128,1,float16,float16,0,0.06268266836802165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,4,128,1,float16,float16,0,0.0649599979321162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.027802666028340656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.024661332368850708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.12878400087356567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,2,128,1,float16,float16,0,0.06386133531729381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.031167998909950256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,8,128,1,float16,float16,0,0.0653546651204427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,64,128,1,float16,float16,0,0.042538667718569435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,2,128,1,float16,float16,0,0.03522666543722153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.06737599770228068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,1,128,1,float16,float16,0,0.03499199946721395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.020773333807786305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.02364266663789749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.021312000850836437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,4,128,1,float16,float16,0,0.03939199944337209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,8,128,1,float16,float16,0,0.03621866554021835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,64,128,1,float16,float16,0,0.02311466634273529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.020453333854675293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,1,128,1,float16,float16,0,0.019968000551064808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,2,128,1,float16,float16,0,0.020015999674797058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.02201066662867864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.018245333184798557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.01964266722400983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,8,128,1,float16,float16,0,0.020202666521072388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.018826667219400406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,4,128,1,float16,float16,0,0.020954666038354237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.01820266619324684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,64,128,1,float16,float16,0,0.013679999858140945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,1,128,1,float16,float16,0,0.012383999923865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.01773333301146825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.017845333864291508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,2,128,1,float16,float16,0,0.012362666428089142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,4,128,1,float16,float16,0,0.012383999923865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,8,128,1,float16,float16,0,0.012416000167528788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.037045332292715706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.017685333887736004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.021695998807748158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,1,128,1,float16,float16,0,0.0085333331177632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,64,128,1,float16,float16,0,0.009322666873534521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,2,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,4,128,1,float16,float16,0,0.008442666381597519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,8,128,1,float16,float16,0,0.008613333106040955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.01748266691962878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.01672533278663953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.017488000293572743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,1,128,1,float16,float16,0,0.008245333408315977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,2,128,1,float16,float16,0,0.008192000289758047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,64,128,1,float16,float16,0,0.008885333314538002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,4,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,8,128,1,float16,float16,0,0.008282666405042013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.020047999918460846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.019541333119074505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,2,128,1,float16,float16,0,0.4764053424199422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.17543999354044595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,1,128,1,float16,float16,0,0.4716320037841797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.08357333143552144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.11814933021863301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,4,128,1,float16,float16,0,0.4947199821472168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,8,128,1,float16,float16,0,0.504805326461792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.2856160004933675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,64,128,1,float16,float16,0,0.42445866266886395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,1,128,1,float16,float16,0,0.2225333253542582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,4,128,1,float16,float16,0,0.2276853322982788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.031770666440327965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,2,128,1,float16,float16,0,0.22508267561594644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.49931732813517254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.03543466577927271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.05503466725349426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,64,128,1,float16,float16,0,0.14564266800880432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.08187200129032135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,8,128,1,float16,float16,0,0.22829333941141763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,1,128,1,float16,float16,0,0.11340266466140747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,4,128,1,float16,float16,0,0.11606933673222859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.024735999604066212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,2,128,1,float16,float16,0,0.11293333768844604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,8,128,1,float16,float16,0,0.11559999982515971
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.04297600189844767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,64,128,1,float16,float16,0,0.06548266609509786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.20077866315841675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.02957333376010259
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,1,128,1,float16,float16,0,0.05948266883691152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.0233599990606308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.021776000658671062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,8,128,1,float16,float16,0,0.06037333110968272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.019845332950353622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,4,128,1,float16,float16,0,0.06014933188756307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.10507733623186748
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,2,128,1,float16,float16,0,0.0594400018453598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.02383466561635335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,64,128,1,float16,float16,0,0.03751466671625773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,1,128,1,float16,float16,0,0.03333866596221924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,2,128,1,float16,float16,0,0.0335413341720899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.01989866668979327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.017903999735911686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.01802666609485944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,8,128,1,float16,float16,0,0.03362133353948593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,1,128,1,float16,float16,0,0.01903466631968816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.018826667219400406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.01770666614174843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.01775466650724411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,64,128,1,float16,float16,0,0.020634666085243225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.05776533484458923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,4,128,1,float16,float16,0,0.019152000546455383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,2,128,1,float16,float16,0,0.01899733394384384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,4,128,1,float16,float16,0,0.03450666616360346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,8,128,1,float16,float16,0,0.019013332823912304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.03341866781314214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,1,128,1,float16,float16,0,0.011898666620254517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,2,128,1,float16,float16,0,0.011802667131026586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,8,128,1,float16,float16,0,0.011744000017642975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.01738133281469345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,64,128,1,float16,float16,0,0.008789333204428354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.016672000288963318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,64,128,1,float16,float16,0,0.012351999680201212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,1,128,1,float16,float16,0,0.0081386665503184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,4,128,1,float16,float16,0,0.011887999872366587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,2,128,1,float16,float16,0,0.008053333188096682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,4,128,1,float16,float16,0,0.008229333286484083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,8,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,64,128,1,float16,float16,0,0.008581333483258883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,1,128,1,float16,float16,0,0.007994666695594788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,4,128,1,float16,float16,0,0.008047999814152718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,8,128,1,float16,float16,0,0.008000000069538752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.016575999557971954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.01988799994190534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,fp8,0,0.020560000091791153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,fp8,0,0.026693334182103474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.03850133220354716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,2,128,1,float16,float16,0,0.031136001149813335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,8,128,1,float16,float16,0,0.06946133573849995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.057520002126693726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,1,128,1,float16,float16,0,0.015781333049138386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,4,128,1,float16,float16,0,0.04498666524887085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,64,128,1,float16,float16,0,0.20107199748357138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,2,128,1,float16,float16,0,0.019834666202465694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,4,128,1,float16,float16,0,0.026906666656335194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.02333866556485494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.17385067542394003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,64,128,1,float16,float16,0,0.10287466645240784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,fp8,0,0.013381333400805792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.034458667039871216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,1,128,1,float16,float16,0,0.009701333319147428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,8,128,1,float16,float16,0,0.040607998768488564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,2,128,1,float16,float16,0,0.014442666123310724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,4,128,1,float16,float16,0,0.04279999931653341
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.09334400296211243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,fp8,0,0.013584000368913015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.015626666446526844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.02160000056028366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,1,128,1,float16,float16,0,0.00956266683836778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,8,128,1,float16,float16,0,0.024901332954565685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,64,128,1,float16,float16,0,0.05529066423575083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,2,128,1,float16,float16,0,0.013514666507641474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,fp8,0,0.012373333175977072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,fp8,0,0.012421333541472753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.053930665055910744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,4,128,1,float16,float16,0,0.014202666779359182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,8,128,1,float16,float16,0,0.018058666338523228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,64,128,1,float16,float16,0,0.03256533294916153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,fp8,0,0.011909333368142446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,1,128,1,float16,float16,0,0.009445333232482275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,fp8,0,0.011920000116030375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,4,128,1,float16,float16,0,0.013573333621025085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,2,128,1,float16,float16,0,0.01350933313369751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.014021333307027817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,8,128,1,float16,float16,0,0.01402666668097178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,64,128,1,float16,float16,0,0.01836266616980235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,1,128,1,float16,float16,0,0.010762666662534079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.03459733227888743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,fp8,0,0.01184533288081487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,2,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.01157333329319954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.013408000270525614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,8,128,1,float16,float16,0,0.013418667018413544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,64,128,1,float16,float16,0,0.012191999703645706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,4,128,1,float16,float16,0,0.013301332791646322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.024106666445732117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,2,128,1,float16,float16,0,0.01249066616098086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,fp8,0,0.011802667131026586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,4,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,fp8,0,0.01181866725285848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.017770666629076004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.019567999988794327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,2,128,1,float16,float16,0,0.0122079998254776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,4,128,1,float16,float16,0,0.01211200033624967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,8,128,1,float16,float16,0,0.012202666451533636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,64,128,1,float16,float16,0,0.008192000289758047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.011359999577204386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,64,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.01184533288081487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,4,128,1,float16,float16,0,0.008629333227872849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,8,128,1,float16,float16,0,0.010496000448862711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.013466666142145792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.016805333395799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,5.35533332824707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,6.354944229125977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,9.632421493530273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,23.696990966796875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,2.6951732635498047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,1,128,1,float16,float16,0,79.7053934733073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,48,128,1,float16,float16,0,84.37345377604167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,3.220149358113607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,4.833658536275228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,2,128,1,float16,float16,0,81.05458068847656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,11.579007466634115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,1,128,1,float16,float16,0,161.84150187174478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,4,128,1,float16,float16,0,162.30186971028647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,2,128,1,float16,float16,0,165.40638224283853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,8,128,1,float16,float16,0,161.02564493815103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,1.3636587460835774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,1.6023680369059246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,48,128,1,float16,float16,0,41.453572591145836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,2.573637326558431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,4,128,1,float16,float16,0,81.05337524414062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,5.727653503417969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,8,128,1,float16,float16,0,82.90319315592448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,177.9604288736979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,1,128,1,float16,float16,0,39.899940490722656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,0.6732959747314453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,48,128,1,float16,float16,0,21.06925328572591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,2,128,1,float16,float16,0,39.48451232910156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,0.871450662612915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,4,128,1,float16,float16,0,40.07257080078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,1.275882641474406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,1,128,1,float16,float16,0,21.420603434244793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.8252747853597007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,8,128,1,float16,float16,0,41.0620371500651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,4.2325013478597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,2,128,1,float16,float16,0,19.78860855102539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,4,128,1,float16,float16,0,19.323925018310547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,4.83784548441569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,8,128,1,float16,float16,0,19.822410583496094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,42.720235188802086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,6.522362391153972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,14.880447387695312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,86.47928873697917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,2.023519992828369
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,48,128,1,float16,float16,0,49.7510019938151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,1,128,1,float16,float16,0,45.99244689941406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,2.310176054636637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,2,128,1,float16,float16,0,46.810526529947914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,3.4700533548990884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,1,128,1,float16,float16,0,95.43370564778645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,2,128,1,float16,float16,0,92.49169921875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,4,128,1,float16,float16,0,92.27980550130208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,6.855674743652344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,1.0673600037892659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,8,128,1,float16,float16,0,93.33694458007812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,1.2060213088989258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,48,128,1,float16,float16,0,24.770612080891926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,99.16245524088542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,1.7846132914225261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,1,128,1,float16,float16,0,22.68847401936849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,3.5298080444335938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,2,128,1,float16,float16,0,23.407513936360676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,4,128,1,float16,float16,0,47.62328084309896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,0.5090453227361044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,8,128,1,float16,float16,0,46.53093465169271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,0.599455992380778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,4,128,1,float16,float16,0,22.40904998779297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,0.9250400066375732
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,48,128,1,float16,float16,0,12.101460774739584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,1,128,1,float16,float16,0,10.741082509358725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,2,128,1,float16,float16,0,11.072043100992838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.8448746999104817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,8,128,1,float16,float16,0,22.811798095703125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,3.3113600413004556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,4,128,1,float16,float16,0,11.287344614664713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,49.2815195719401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,8,128,1,float16,float16,0,11.160544077555338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,3.741578737894694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,5.2688798904418945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,25.966346740722656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,10.178058624267578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,1.6579200426737468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,48,128,1,float16,float16,0,33.36401621500651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,1.8856852849324544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,1,128,1,float16,float16,0,32.73949940999349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,2.6174400647481284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,2,128,1,float16,float16,0,31.99927012125651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,5.244490623474121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,1,128,1,float16,float16,0,65.74758402506511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,2,128,1,float16,float16,0,65.52213541666667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,4,128,1,float16,float16,0,65.75863647460938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,0.8443840344746908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,8,128,1,float16,float16,0,64.30428568522136
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,0.9562186400095621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,4,128,1,float16,float16,0,31.078160603841145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,48,128,1,float16,float16,0,17.090267181396484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.407983938852946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,2.5391039848327637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,1,128,1,float16,float16,0,16.64208475748698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,69.43733215332031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,8,128,1,float16,float16,0,32.982800801595054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.4176853497823079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,2,128,1,float16,float16,0,17.07198969523112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.5076586802800497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,4,128,1,float16,float16,0,15.617557525634766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,0.743722677230835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,48,128,1,float16,float16,0,8.360533396402994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,1,128,1,float16,float16,0,8.003717422485352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,2,128,1,float16,float16,0,7.571168263753255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,1.4079786936442058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,8,128,1,float16,float16,0,16.796656290690105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,4,128,1,float16,float16,0,7.5473283131917315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,8,128,1,float16,float16,0,7.413861592610677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,5.253210703531901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,16.987599690755207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,6.137989044189453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,35.100362141927086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,7.660320281982422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,14.294228871663412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,2.617237408955892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,1,128,1,float16,float16,0,40.84658559163412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,48,128,1,float16,float16,0,45.083892822265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,2.945978800455729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,3.9028587341308594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,2,128,1,float16,float16,0,39.97356160481771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,7.311360041300456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,1,128,1,float16,float16,0,85.66665649414062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,2,128,1,float16,float16,0,86.91222127278645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,4,128,1,float16,float16,0,90.67453002929688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,1.332634687423706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,8,128,1,float16,float16,0,87.97609456380208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,1.5334240595499675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,48,128,1,float16,float16,0,22.792564392089844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,4,128,1,float16,float16,0,45.42713928222656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,1.9964799880981445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,92.11537679036458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,1,128,1,float16,float16,0,20.6768798828125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,8,128,1,float16,float16,0,43.5475819905599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,3.580922762552897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,0.6582080125808716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,2,128,1,float16,float16,0,20.611632029215496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,48,128,1,float16,float16,0,10.504496256510416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,0.7692480087280273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,1,128,1,float16,float16,0,9.987216313680014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,0.9992480278015137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,4,128,1,float16,float16,0,20.48845926920573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,2,128,1,float16,float16,0,10.162698745727539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.9621013005574544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,8,128,1,float16,float16,0,21.353248596191406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,48,128,1,float16,float16,0,5.129839897155762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.3245866696039836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,1,128,1,float16,float16,0,4.63593069712321
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,4,128,1,float16,float16,0,10.221189498901367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,8,128,1,float16,float16,0,10.1659787495931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.393829345703125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,44.72386169433594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.5263146559397379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,21.86101786295573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,2,128,1,float16,float16,0,4.444911956787109
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,1.0388960043589275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,8,128,1,float16,float16,0,4.449343999226888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,4,128,1,float16,float16,0,4.8436587651570635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,10.684735616048178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,3.8674933115641275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,4.287509282430013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.455600102742513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,9.06662368774414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,1.9745333989461262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,1,128,1,float16,float16,0,23.7096430460612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,2.209280014038086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,48,128,1,float16,float16,0,27.488245646158855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,2,128,1,float16,float16,0,23.27003224690755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.907957394917806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,1,128,1,float16,float16,0,48.525349934895836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,2,128,1,float16,float16,0,48.41026306152344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,4.968394597371419
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,4,128,1,float16,float16,0,50.8797861735026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,8,128,1,float16,float16,0,51.702972412109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,0.9873493512471517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.0863093535105388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,4,128,1,float16,float16,0,24.23248545328776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,52.24986267089844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,48,128,1,float16,float16,0,13.33074696858724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,1,128,1,float16,float16,0,11.797845204671225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.454437255859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,2.4584906895955405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,8,128,1,float16,float16,0,24.64465586344401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,2,128,1,float16,float16,0,12.855093638102213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.4955413341522217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,4,128,1,float16,float16,0,12.067882537841797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.5475946664810181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,48,128,1,float16,float16,0,6.201759974161784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.6948853333791097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,1,128,1,float16,float16,0,4.458677291870117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,1.166266679763794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,8,128,1,float16,float16,0,12.056096394856771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,2,128,1,float16,float16,0,5.627610524495442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,4,128,1,float16,float16,0,5.321690559387207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.09593066573143005
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.15356266498565674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,48,128,1,float16,float16,0,2.964357376098633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,8,128,1,float16,float16,0,4.991152127583821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,1,128,1,float16,float16,0,2.583893299102783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,12.281210581461588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,27.428324381510418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,2,128,1,float16,float16,0,2.637808005015055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.3569066524505615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.6244106690088908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,4,128,1,float16,float16,0,2.305626710255941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,8,128,1,float16,float16,0,2.741530736287435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,5.622048060099284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,5.106346766153972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,5.92957878112793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,6.77674674987793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,10.28500239054362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.555445353190104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,1,128,1,float16,float16,0,22.8691889444987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,48,128,1,float16,float16,0,25.119723002115887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,2.826218605041504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,2,128,1,float16,float16,0,21.3480224609375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,3.425509452819824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,2,128,1,float16,float16,0,45.73017374674479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,1,128,1,float16,float16,0,47.20709228515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,4,128,1,float16,float16,0,46.85303751627604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,5.347391764322917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,8,128,1,float16,float16,0,48.59467569986979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.3712159792582195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.4699519475301106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,4,128,1,float16,float16,0,22.607999165852863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,1,128,1,float16,float16,0,10.994346618652344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,48,128,1,float16,float16,0,13.518671671549479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,49.40938822428385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.8280213673909504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,2,128,1,float16,float16,0,11.608197530110678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,8,128,1,float16,float16,0,22.109087626139324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,3.048645337422689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,4,128,1,float16,float16,0,9.701786677042643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.6461013158162435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.7225120067596436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,1,128,1,float16,float16,0,5.121450742085774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,48,128,1,float16,float16,0,5.772053400675456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.8585653305053711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,2,128,1,float16,float16,0,4.455567995707194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,4,128,1,float16,float16,0,4.710970560709636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,8,128,1,float16,float16,0,11.194928487141928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,1.4519359270731609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,48,128,1,float16,float16,0,2.778911908467611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.32974400122960407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,1,128,1,float16,float16,0,2.431877295176188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,8,128,1,float16,float16,0,5.385013580322266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.3566773335138957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,12.222671508789062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.44884800910949707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,25.051406860351562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,4,128,1,float16,float16,0,2.3810507456461587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,2,128,1,float16,float16,0,2.102479934692383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,48,128,1,float16,float16,0,1.3434185981750488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.6678720315297445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.04937600096066793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,5.833503723144531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,1,128,1,float16,float16,0,1.1616426308949788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,8,128,1,float16,float16,0,2.2781707445780435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.06703466673692067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,2,128,1,float16,float16,0,1.1182080109914143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.14114666978518167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.2760480046272278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,8,128,1,float16,float16,0,1.0907413164774578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,4,128,1,float16,float16,0,1.1043306986490886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,2.5302720069885254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,4.411701202392578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,4.105242729187012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,5.184901237487793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,7.55681037902832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.9279467264811199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,1,128,1,float16,float16,0,12.65221913655599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,2.12554661432902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,2,128,1,float16,float16,0,26.983434041341145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,4,128,1,float16,float16,0,26.711013793945312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,1,128,1,float16,float16,0,26.37486521402995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,8,128,1,float16,float16,0,27.42333221435547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,48,128,1,float16,float16,0,14.668336232503256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,2.48087469736735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,2,128,1,float16,float16,0,11.551221211751303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,3.6322240829467773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.9734026590983073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,1,128,1,float16,float16,0,5.06333859761556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,1.0396587053934734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,48,128,1,float16,float16,0,7.113861083984375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,2,128,1,float16,float16,0,4.869253476460774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,4,128,1,float16,float16,0,12.709264119466146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,1.241055965423584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,8,128,1,float16,float16,0,12.987055460611979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,1.9666345914204915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,29.40253448486328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.4789546728134155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,4,128,1,float16,float16,0,5.997925440470378
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,1,128,1,float16,float16,0,2.6514293352762857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,48,128,1,float16,float16,0,3.3886292775472007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.51910400390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,8,128,1,float16,float16,0,5.6492156982421875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.6120800177256266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.8861227035522461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,2,128,1,float16,float16,0,2.7768001556396484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,4,128,1,float16,float16,0,2.9318507512410483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,8,128,1,float16,float16,0,2.5117600758870444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,6.65943972269694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,14.197386423746744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,1,128,1,float16,float16,0,1.2380212942759197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.10844799876213074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,48,128,1,float16,float16,0,1.8070613543192546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.15521066387494406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,2,128,1,float16,float16,0,1.3690080642700195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.4477440118789673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,4,128,1,float16,float16,0,1.405178705851237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,8,128,1,float16,float16,0,1.2788426876068115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,1,128,1,float16,float16,0,0.659882664680481
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,48,128,1,float16,float16,0,0.8638559977213541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,3.1063413619995117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.0373279998699824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.04472533365090688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.31033599376678467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.17459734280904135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.08705066641171773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,4,128,1,float16,float16,0,0.637498656908671
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,8,128,1,float16,float16,0,0.7406240304311117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,2,128,1,float16,float16,0,0.6421759923299154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,1.5345066388448079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,6.279370625813802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,8.63100814819336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,5.478352228800456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,5.868522644042969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,2.738719940185547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,1,128,1,float16,float16,0,8.794954935709635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,48,128,1,float16,float16,0,14.604426066080729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,1,128,1,float16,float16,0,25.174395243326824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,4,128,1,float16,float16,0,26.136517842610676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,2.811258633931478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,3.2055466969807944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,2,128,1,float16,float16,0,25.838399251302082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,8,128,1,float16,float16,0,26.620806376139324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,2,128,1,float16,float16,0,11.509498596191406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,4.357770601908366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,1.2990612983703613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,4,128,1,float16,float16,0,12.193861643473307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,48,128,1,float16,float16,0,6.9987945556640625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,1.3621385892232258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,8,128,1,float16,float16,0,12.615712483723959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,1,128,1,float16,float16,0,4.426752090454102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,1.5876587231953938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,28.91551971435547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,2,128,1,float16,float16,0,4.626250584920247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,2.2819412549336753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,4,128,1,float16,float16,0,5.304138819376628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,8,128,1,float16,float16,0,5.660762786865234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,48,128,1,float16,float16,0,3.564938545227051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.687882661819458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.6867520014444987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,1,128,1,float16,float16,0,2.5420640309651694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,2,128,1,float16,float16,0,2.281439940134684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.807919979095459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,1.0748533407847087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,8,128,1,float16,float16,0,2.4130187034606934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,4,128,1,float16,float16,0,2.3881173133850098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,6.919338862101237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,13.976880391438803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.2964906692504883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.3446720043818156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,4,128,1,float16,float16,0,1.238368034362793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.3743520180384318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,2,128,1,float16,float16,0,1.1998079617818196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,48,128,1,float16,float16,0,1.8240159352620442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,1,128,1,float16,float16,0,1.229477326075236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.511184016863505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,48,128,1,float16,float16,0,0.8945919672648112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,8,128,1,float16,float16,0,1.3013226985931396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,3.321626663208008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.04854933420817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.04062400013208389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,4,128,1,float16,float16,0,0.649344007174174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,1,128,1,float16,float16,0,0.6171733140945435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.08100266754627228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,2,128,1,float16,float16,0,0.6238720019658407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,1.604629357655843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.16905067364374796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,8,128,1,float16,float16,0,0.6618133385976156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.0277813325325648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,2,128,1,float16,float16,0,0.3103040059407552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,48,128,1,float16,float16,0,0.4462133248647054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,1,128,1,float16,float16,0,0.307258665561676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.04902400076389313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.032261334359645844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,4,128,1,float16,float16,0,0.31669867038726807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,8,128,1,float16,float16,0,0.3135733405749003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.09210667014122009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.8045706748962402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,3.8300212224324546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,4.71998945871989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,6.364346822102864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,4.399893442789714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,1.9337600072224934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,2,128,1,float16,float16,0,13.552740732828775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,4,128,1,float16,float16,0,15.187749226888021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,1,128,1,float16,float16,0,14.533760070800781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,8,128,1,float16,float16,0,15.28069814046224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,1,128,1,float16,float16,0,6.091077168782552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,48,128,1,float16,float16,0,9.057882944742838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,2.077946662902832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,2,128,1,float16,float16,0,5.363552093505859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,4,128,1,float16,float16,0,7.0667463938395185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,2.5313119888305664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,3.2114079793294272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,48,128,1,float16,float16,0,4.477994600931804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.9727573394775391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,1,128,1,float16,float16,0,2.926032066345215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,8,128,1,float16,float16,0,6.640485127766927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,1.0292800267537434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,1.165712038675944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,18.131248474121094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,2,128,1,float16,float16,0,2.7486772537231445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,4,128,1,float16,float16,0,2.7722400029500327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,1.6156694094340007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,8,128,1,float16,float16,0,3.038890520731608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.4732853174209595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,1,128,1,float16,float16,0,1.4176373481750488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,48,128,1,float16,float16,0,2.293999989827474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,4,128,1,float16,float16,0,1.4866080284118652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.5379093488057455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.55894935131073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,2,128,1,float16,float16,0,1.3698986371358235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,8,128,1,float16,float16,0,1.4689119656880696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,8.066389083862305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.7694133122762045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,1,128,1,float16,float16,0,0.7089386781056722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,48,128,1,float16,float16,0,1.1225600242614746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.1014453371365865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,3.955264091491699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.22005333503087363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.15121066570281982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,4,128,1,float16,float16,0,0.7760106722513834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,2,128,1,float16,float16,0,0.7174986998240153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.3429439862569173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,48,128,1,float16,float16,0,0.5900693337122599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,1,128,1,float16,float16,0,0.36735999584198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.030026666820049286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,8,128,1,float16,float16,0,0.7701866626739502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,2,128,1,float16,float16,0,0.3670666615168254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.036661334335803986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.9630773067474365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,2.023215929667155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.06623999774456024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,4,128,1,float16,float16,0,0.3709760109583537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,8,128,1,float16,float16,0,0.3982880115509033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.10999466975529988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,48,128,1,float16,float16,0,0.2771040002504985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.022250667214393616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,4,128,1,float16,float16,0,0.1971679925918579
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.026159999271233875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.03762666632731756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,2,128,1,float16,float16,0,0.202890674273173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,8,128,1,float16,float16,0,0.20286399126052856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,1,128,1,float16,float16,0,0.1928000052769979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.06901866694291432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.43672533830006915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,5.108266512552897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,5.487024307250977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,6.254741032918294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,8,128,1,float16,float16,0,15.486367543538412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,1,128,1,float16,float16,0,13.375738779703775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,2,128,1,float16,float16,0,14.713371276855469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,2.740234692891439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,7.906922658284505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,1,128,1,float16,float16,0,5.631957372029622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,2,128,1,float16,float16,0,6.2017866770426435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,2.9263413747151694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,3.15283203125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,3.9420105616251626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,8,128,1,float16,float16,0,6.23037846883138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,4,128,1,float16,float16,0,15.442128499348959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,4,128,1,float16,float16,0,5.754218419392903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,1.297397295633952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,1,128,1,float16,float16,0,2.551509380340576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,48,128,1,float16,float16,0,5.07205327351888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,48,128,1,float16,float16,0,10.240975697835287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,4,128,1,float16,float16,0,2.8753493626912436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,1.5600426991780598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,17.199562072753906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,1.422426700592041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,8,128,1,float16,float16,0,2.988170623779297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,1,128,1,float16,float16,0,1.3124213218688965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,8.258853276570639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.6377386649449667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,48,128,1,float16,float16,0,2.5661333401997886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.7248000303904215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,2,128,1,float16,float16,0,1.3392853736877441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.7520053386688232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,4,128,1,float16,float16,0,1.38702392578125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,8,128,1,float16,float16,0,1.5206133524576824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,1.0132799943288167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,2,128,1,float16,float16,0,2.633018652598063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,48,128,1,float16,float16,0,1.3057599862416585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.3277440071105957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,2.0856480598449707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,2,128,1,float16,float16,0,0.7006826400756836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,4.396490732828776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,1,128,1,float16,float16,0,0.682650645573934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.3209279974301656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.36876265207926434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,48,128,1,float16,float16,0,0.6446346839269003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.4172106583913167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,4,128,1,float16,float16,0,0.7702186902364095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,1,128,1,float16,float16,0,0.35260268052419025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.03749866783618927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.044624000787734985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,2.1632800102233887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,8,128,1,float16,float16,0,0.7679999669392904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.12805333733558655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.06866666674613953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,8,128,1,float16,float16,0,0.4023199876149495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,2,128,1,float16,float16,0,0.3615413506825765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,4,128,1,float16,float16,0,0.39856000741322833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,48,128,1,float16,float16,0,0.3210879961649577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,1,128,1,float16,float16,0,0.17296000321706137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,1.0166719754536946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.031066666046778362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.024549332757790882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,2,128,1,float16,float16,0,0.1785973310470581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,8,128,1,float16,float16,0,0.18048532803853354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,4,128,1,float16,float16,0,0.1862293283144633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,48,128,1,float16,float16,0,0.1074666678905487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.03935466706752777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.10414399703343709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,1,128,1,float16,float16,0,0.09649067123730977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,2,128,1,float16,float16,0,0.10074667135874431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.06348266700903575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.48576001326243085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,8,128,1,float16,float16,0,0.0997866690158844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,4,128,1,float16,float16,0,0.10214933753013611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.11547199885050456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.1337493360042572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.11043199896812439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.3126133282979329
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,5.1117814381917315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,5.693306605021159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,1,128,1,float16,float16,0,9.264826456705729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,2,128,1,float16,float16,0,9.426885604858398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,6.2738189697265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,4,128,1,float16,float16,0,10.078986485799154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,1,128,1,float16,float16,0,3.2731892267862954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,7.92633056640625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,2.5579679807027182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,48,128,1,float16,float16,0,9.662874857584635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,2.7327359517415366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,2,128,1,float16,float16,0,3.3728694915771484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,3.1517120997111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,8,128,1,float16,float16,0,10.845860799153646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,4,128,1,float16,float16,0,3.7061281204223633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,48,128,1,float16,float16,0,4.840784072875977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,8,128,1,float16,float16,0,4.432954788208008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,12.397387186686197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,3.952576001485189
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,1.2871840000152588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,2,128,1,float16,float16,0,1.7004159291585286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,4,128,1,float16,float16,0,1.8798559506734211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,1.5618185997009277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,48,128,1,float16,float16,0,2.6459147135416665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,6.336832046508789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,1.971461296081543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,1,128,1,float16,float16,0,0.8142613569895426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,8,128,1,float16,float16,0,2.102282683054606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,1,128,1,float16,float16,0,1.6109813054402669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.6875680287679037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,1.45522673924764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,2,128,1,float16,float16,0,0.8715519905090332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,4,128,1,float16,float16,0,0.9149813652038574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,48,128,1,float16,float16,0,1.220031976699829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,1,128,1,float16,float16,0,0.42667198181152344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.6752853393554688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,8,128,1,float16,float16,0,1.0252532958984375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,3.2484747568766275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.7485493024190267
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.2993920048077901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,2,128,1,float16,float16,0,0.44257601102193195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,1.0236000219980876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.31884799400965375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,8,128,1,float16,float16,0,0.49986668427785236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.36980799833933514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,1,128,1,float16,float16,0,0.2228320042292277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,48,128,1,float16,float16,0,0.594922661781311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,4,128,1,float16,float16,0,0.4749759833017985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.03697066754102707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,1.5827199618021648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.42633601029713947
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.042319998145103455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,8,128,1,float16,float16,0,0.2614026665687561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,2,128,1,float16,float16,0,0.23461333910624185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.0610346645116806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,4,128,1,float16,float16,0,0.25117333730061847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,48,128,1,float16,float16,0,0.24167466163635254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.024458666642506916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,2,128,1,float16,float16,0,0.10761599739392598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.02586666742960612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.12380799651145935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,4,128,1,float16,float16,0,0.11318399508794148
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,1,128,1,float16,float16,0,0.1065066655476888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,8,128,1,float16,float16,0,0.11190932989120483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.03636800001064936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,1,128,1,float16,float16,0,0.06238933404286703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.04479999840259552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,2,128,1,float16,float16,0,0.06247466802597046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,48,128,1,float16,float16,0,0.07569600145022075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.34460266431172687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.1053653359413147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.7169386545817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.10539199908574422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,4,128,1,float16,float16,0,0.0674773355325063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.11275733510653178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.24174400170644125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,8,128,1,float16,float16,0,0.06935466825962067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.060005332032839455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.05982399980227152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,1,128,1,float16,float16,0,0.03457066665093104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,2,128,1,float16,float16,0,0.035088000198205314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.12361600001653035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,48,128,1,float16,float16,0,0.039877332746982574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,4,128,1,float16,float16,0,0.035887998839219414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.06458133459091187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,8,128,1,float16,float16,0,0.03585066646337509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.1276693344116211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.06804800033569336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,2.560933272043864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,1,128,1,float16,float16,0,2.912250518798828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,2.947146733601888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,4,128,1,float16,float16,0,3.5058771769205728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,2,128,1,float16,float16,0,3.4145387013753257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,3.3370933532714844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,4.0092213948567705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,48,128,1,float16,float16,0,4.854682604471843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,1.2947039604187012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,2,128,1,float16,float16,0,1.491221268971761
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,1,128,1,float16,float16,0,1.4012053807576497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,1.367136001586914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,8,128,1,float16,float16,0,4.4350080490112305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,5.523663838704427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,1.6581333478291829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,4,128,1,float16,float16,0,1.5426452954610188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.6913119951883951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,8,128,1,float16,float16,0,1.8405653635660808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,48,128,1,float16,float16,0,2.6417387326558432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,2.0044213930765786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.7208639780680338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,2.669637362162272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,4,128,1,float16,float16,0,0.7562080224355062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,2,128,1,float16,float16,0,0.729850689570109
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.7987840175628662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,1,128,1,float16,float16,0,0.3404586712519328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,48,128,1,float16,float16,0,1.3068586985270183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,8,128,1,float16,float16,0,0.8999413649241129
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,1,128,1,float16,float16,0,0.6333813269933065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.3049173355102539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,1.2249973615010579
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,2,128,1,float16,float16,0,0.36079466342926025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,1.0183200041453044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.3419946829477946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,4,128,1,float16,float16,0,0.3724213441212972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.3489760160446167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,1,128,1,float16,float16,0,0.1482186714808146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.03719466676314672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,48,128,1,float16,float16,0,0.6394293308258057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.41180264949798584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,2,128,1,float16,float16,0,0.16966933012008667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,8,128,1,float16,float16,0,0.4883093436559041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.042591998974482216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,4,128,1,float16,float16,0,0.18607999881108603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.058186665177345276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,8,128,1,float16,float16,0,0.19936533768971762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,1,128,1,float16,float16,0,0.07461866736412048
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.11385599772135417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.02593066543340683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,2,128,1,float16,float16,0,0.07760000228881836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,4,128,1,float16,float16,0,0.08116800089677174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.2658453385035197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,48,128,1,float16,float16,0,0.2418986757596334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.5415573517481486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.10613866647084554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,1,128,1,float16,float16,0,0.04298666616280874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,48,128,1,float16,float16,0,0.052613332867622375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,8,128,1,float16,float16,0,0.08257066706816356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.19235199689865112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.041797334949175514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,2,128,1,float16,float16,0,0.04173333446184794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.10760533809661865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,1,128,1,float16,float16,0,0.024517332514127094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,8,128,1,float16,float16,0,0.046351999044418335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,4,128,1,float16,float16,0,0.0459146648645401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.11595732967058818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.10921066999435425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.06074133515357971
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,48,128,1,float16,float16,0,0.03065066784620285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,2,128,1,float16,float16,0,0.02465066562096278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.06198399762312571
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.05992533266544342
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,4,128,1,float16,float16,0,0.026421333352724712
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,8,128,1,float16,float16,0,0.02625600000222524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,48,128,1,float16,float16,0,0.02000533292690913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.06512533128261566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.03752533346414566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,1,128,1,float16,float16,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,4,128,1,float16,float16,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,2,128,1,float16,float16,0,0.017210666090250015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.1006666620572408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.037418665985266365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.03682666768630346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,8,128,1,float16,float16,0,0.01757866640885671
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.04042666653792063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.058335999647776283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,1,128,1,float16,float16,0,1.3789067268371582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,1.2826346556345622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,2,128,1,float16,float16,0,1.586085319519043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,1.4700853029886882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,1.587663968404134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,4,128,1,float16,float16,0,1.7312053044637044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,8,128,1,float16,float16,0,1.9762719472249348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,1,128,1,float16,float16,0,0.6724906762441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,48,128,1,float16,float16,0,2.439450740814209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.6886613368988037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,2.1566346486409507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,2,128,1,float16,float16,0,0.7067466576894125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.677226702372233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,4,128,1,float16,float16,0,0.7830773194630941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,2.6536693572998047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.7540213267008463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,1,128,1,float16,float16,0,0.3378026485443115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.2884533405303955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,8,128,1,float16,float16,0,1.0334773063659668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,2,128,1,float16,float16,0,0.3766080141067505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.32181866963704425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,4,128,1,float16,float16,0,0.38152531782786053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.9574933052062988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,1.2512479623158772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.34364267190297443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,1,128,1,float16,float16,0,0.12176000078519185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.4450186491012573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.03770133356253306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,2,128,1,float16,float16,0,0.125082661708196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,48,128,1,float16,float16,0,0.641157348950704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,8,128,1,float16,float16,0,0.5106026728947958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.04197866717974345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.5081653197606405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.05877333382765452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,48,128,1,float16,float16,0,0.22126932938893637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,48,128,1,float16,float16,0,1.30839999516805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,1,128,1,float16,float16,0,0.05849599838256836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,8,128,1,float16,float16,0,0.2015626629193624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,4,128,1,float16,float16,0,0.17172267039616904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.08746133248011272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.024117333193620045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,2,128,1,float16,float16,0,0.06025066475073496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.025759999950726826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,4,128,1,float16,float16,0,0.06519466638565063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.2188053329785665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,1,128,1,float16,float16,0,0.032746667663256325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,8,128,1,float16,float16,0,0.06519466638565063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,48,128,1,float16,float16,0,0.04286933441956838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.038021333515644073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.019120000302791595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,2,128,1,float16,float16,0,0.033530667424201965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.018272000054518383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,4,128,1,float16,float16,0,0.03576533248027166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,8,128,1,float16,float16,0,0.035930665830771126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.02359466751416524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.022197333474953968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.082997332016627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,1,128,1,float16,float16,0,0.019365333020687103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,2,128,1,float16,float16,0,0.019706666469573975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,48,128,1,float16,float16,0,0.024858665963013966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.015482666591803232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,4,128,1,float16,float16,0,0.021029333273569744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,8,128,1,float16,float16,0,0.020762667059898376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,48,128,1,float16,float16,0,0.016165333489576977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,1,128,1,float16,float16,0,0.0136266661187013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.0138026662170887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,2,128,1,float16,float16,0,0.013690666606028875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.04124266654253006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,4,128,1,float16,float16,0,0.01414399966597557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.013999999811251959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.014159999787807465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,8,128,1,float16,float16,0,0.01414399966597557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.014021333307027817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,1,128,1,float16,float16,0,0.009354666496316591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.02779199928045273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,2,128,1,float16,float16,0,0.009226666763424873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,4,128,1,float16,float16,0,0.009349333122372627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.013487999637921652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,8,128,1,float16,float16,0,0.009408000235756239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,48,128,1,float16,float16,0,0.013418667018413544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.0145066666106383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.6445706685384115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.6772639751434326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,4,128,1,float16,float16,0,0.7843786875406901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,2,128,1,float16,float16,0,0.7273333072662354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,1,128,1,float16,float16,0,0.6745546658833822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.7634666760762533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.9671520392100016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,8,128,1,float16,float16,0,0.981941302617391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,1,128,1,float16,float16,0,0.3391306797663371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.3102773427963257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,48,128,1,float16,float16,0,1.238165299097697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,2,128,1,float16,float16,0,0.34513600667317706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,1.1799039840698242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.3235093355178833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,8,128,1,float16,float16,0,0.48045865694681805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.450602650642395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,4,128,1,float16,float16,0,0.3822346528371175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.34373335043589276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.03708266715208689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,2,128,1,float16,float16,0,0.1395039955774943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.04228800038496653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,48,128,1,float16,float16,0,0.5932799975077311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,4,128,1,float16,float16,0,0.15333867073059082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.4986079931259155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,8,128,1,float16,float16,0,0.18760534127553305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.05827199916044871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.07342933118343353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,1,128,1,float16,float16,0,0.05420266588528951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,48,128,1,float16,float16,0,0.23817066351572672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,2,128,1,float16,float16,0,0.05596800148487091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.025839999318122864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,4,128,1,float16,float16,0,0.05996266504128774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.033376000821590424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.03774933268626531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,48,128,1,float16,float16,0,0.03998400022586187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.18077866236368814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,1,128,1,float16,float16,0,0.030581332743167877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,8,128,1,float16,float16,0,0.06025066475073496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.018063999712467194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,8,128,1,float16,float16,0,0.03366933266321818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,4,128,1,float16,float16,0,0.0337119996547699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.055104002356529236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.022128000855445862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.023573334018389385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,2,128,1,float16,float16,0,0.031583999594052635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,48,128,1,float16,float16,0,0.022629333039124806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,1,128,1,float16,float16,0,0.017973333597183228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.03385066737731298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.015290666371583939
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.02420266717672348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.01543466622630755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,2,128,1,float16,float16,0,0.01800000046690305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.01893866683046023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,4,128,1,float16,float16,0,0.01921066641807556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,1,128,1,float16,float16,0,0.12260799606641133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,48,128,1,float16,float16,0,0.015413332730531693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,8,128,1,float16,float16,0,0.019205333044131596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,4,128,1,float16,float16,0,0.013306666165590286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.0138026662170887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.013786666095256805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.014277332772811254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.019946667055288952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,8,128,1,float16,float16,0,0.013818666338920593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,48,128,1,float16,float16,0,0.013301332791646322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.014165333161751429
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,1,128,1,float16,float16,0,0.01044800008336703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,2,128,1,float16,float16,0,0.010351999973257383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,4,128,1,float16,float16,0,0.009061333412925402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,8,128,1,float16,float16,0,0.010666667173306147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.014117332796255747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.013440000514189402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,1,128,1,float16,float16,0,0.010527999450763067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,48,128,1,float16,float16,0,0.00938666673998038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,2,128,1,float16,float16,0,0.010506667196750641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,4,128,1,float16,float16,0,0.008576000109314919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.01381333296497663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.014576000471909841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,8,128,1,float16,float16,0,0.010405333091815313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.3224693338076274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.4017386833826701
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,8,128,1,float16,float16,0,0.4851040045420329
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,1,128,1,float16,float16,0,0.40408531824747723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,2,128,1,float16,float16,0,0.42390398184458417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.37770132223765057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,4,128,1,float16,float16,0,0.4596586624781291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,48,128,1,float16,float16,0,0.5868693192799886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.4907093445460002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,1,128,1,float16,float16,0,0.18946667512257895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,2,128,1,float16,float16,0,0.20097066958745322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.04170133173465729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.0470719983180364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.13854400316874185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.5919839938481649
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,4,128,1,float16,float16,0,0.22032533089319864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,48,128,1,float16,float16,0,0.2444480061531067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,8,128,1,float16,float16,0,0.2363199989000956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,1,128,1,float16,float16,0,0.09236266215642293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.02775466690460841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.029898665845394135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,8,128,1,float16,float16,0,0.09834667046864827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.2730613350868225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.07547733187675476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.03836799909671148
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,48,128,1,float16,float16,0,0.059279998143514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.05249066650867462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,4,128,1,float16,float16,0,0.09797867139180501
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,4,128,1,float16,float16,0,0.05115200082461039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.0958133339881897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,2,128,1,float16,float16,0,0.04920533299446106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.023034666975339253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,8,128,1,float16,float16,0,0.0514933317899704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.02179199953873952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.026127999027570088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,48,128,1,float16,float16,0,0.032773333291212715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,2,128,1,float16,float16,0,0.0941546658674876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,1,128,1,float16,float16,0,0.027957332630952198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,1,128,1,float16,float16,0,0.04811733464399973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.01995733380317688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,2,128,1,float16,float16,0,0.027855999767780304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.028725333511829376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,8,128,1,float16,float16,0,0.029103999336560566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.020506666352351505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,4,128,1,float16,float16,0,0.02897600084543228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.052383999029795326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,48,128,1,float16,float16,0,0.018415999909241993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.021557333568731945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.01825599993268649
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,2,128,1,float16,float16,0,0.016303999970356624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,1,128,1,float16,float16,0,0.0162773331006368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.017994667092959087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.018266666680574417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,8,128,1,float16,float16,0,0.016634666671355564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,4,128,1,float16,float16,0,0.01643199970324834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,48,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.03443733354409536
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,1,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,4,128,1,float16,float16,0,0.012256000190973282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,2,128,1,float16,float16,0,0.012042666474978128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.01766933376590411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,8,128,1,float16,float16,0,0.012170666207869848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.017701332767804463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,1,128,1,float16,float16,0,0.008314666648705801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.02014933278163274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,48,128,1,float16,float16,0,0.009002666920423508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.01681600014368693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,2,128,1,float16,float16,0,0.008373333141207695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,4,128,1,float16,float16,0,0.008453333129485449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,8,128,1,float16,float16,0,0.008538666491707167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,1,128,1,float16,float16,0,0.008261333530147871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.018885333091020584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,48,128,1,float16,float16,0,0.013663999736309052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,2,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,4,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.01829333355029424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,8,128,1,float16,float16,0,0.00816000004609426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.018288000176350277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,1,128,1,float16,float16,0,0.35174401601155597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.04363733530044556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,2,128,1,float16,float16,0,0.35901331901550293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.06121066709359487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,4,128,1,float16,float16,0,0.3763680060704549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.10429867108662923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,8,128,1,float16,float16,0,0.39235734939575195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,48,128,1,float16,float16,0,0.30266666412353516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,1,128,1,float16,float16,0,0.16924800475438437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.21437867482503256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.028250666956106823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,2,128,1,float16,float16,0,0.17107733090718588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.3801279862721761
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,8,128,1,float16,float16,0,0.1758133371671041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.03242666771014532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,4,128,1,float16,float16,0,0.17486933867136636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,48,128,1,float16,float16,0,0.09596266349156697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.07576533158620198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.021744000415007275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.05204799771308899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,2,128,1,float16,float16,0,0.08729599912961324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.15055466691652933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,4,128,1,float16,float16,0,0.08940266569455464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,48,128,1,float16,float16,0,0.04995200037956238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,8,128,1,float16,float16,0,0.08957333366076152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,1,128,1,float16,float16,0,0.08638399839401245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.027957332630952198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.04193066557248434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.020901332298914593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,4,128,1,float16,float16,0,0.0462773342927297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.019023999571800232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,2,128,1,float16,float16,0,0.045381332437197365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.0811359981695811
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,48,128,1,float16,float16,0,0.028746667007605236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,8,128,1,float16,float16,0,0.046384001771608986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,1,128,1,float16,float16,0,0.045423999428749084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.023050665855407715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,1,128,1,float16,float16,0,0.02807466685771942
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.018415999909241993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,8,128,1,float16,float16,0,0.026736001173655193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,4,128,1,float16,float16,0,0.026821332673231762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.017637333522240322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,2,128,1,float16,float16,0,0.026426665484905243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,48,128,1,float16,float16,0,0.01651200031240781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.04430399835109711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.017583999782800674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.018346666047970455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,2,128,1,float16,float16,0,0.015578666081031164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.017530667285124462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,1,128,1,float16,float16,0,0.015615999698638916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,8,128,1,float16,float16,0,0.01573333392540614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.03155199935038885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,1,128,1,float16,float16,0,0.011717333147923151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,4,128,1,float16,float16,0,0.01552533358335495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,48,128,1,float16,float16,0,0.012282667060693106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,4,128,1,float16,float16,0,0.011605333536863327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.017456000049908955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.016565332810084026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,8,128,1,float16,float16,0,0.01173866664369901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,2,128,1,float16,float16,0,0.011594666788975397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.01979200045267741
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,48,128,1,float16,float16,0,0.008661333471536636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,1,128,1,float16,float16,0,0.00810666692753633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,2,128,1,float16,float16,0,0.008047999814152718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.018613333503405254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,48,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,8,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,4,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,2,128,1,float16,float16,0,0.007941333577036858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,4,128,1,float16,float16,0,0.008330666770537695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,1,128,1,float16,float16,0,0.00795199970404307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,8,128,1,float16,float16,0,0.00854399986565113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.017770666629076004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.018218666315078735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,1,128,1,float16,float16,0,0.014826666563749313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,fp8,0,0.01894933357834816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,2,128,1,float16,float16,0,0.029018667836983997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,4,128,1,float16,float16,0,0.04271999994913737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.036848001182079315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,fp8,0,0.024901332954565685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.05619200070699056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,8,128,1,float16,float16,0,0.06723199784755707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,48,128,1,float16,float16,0,0.15400532881418863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,1,128,1,float16,float16,0,0.010064000263810158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,fp8,0,0.014357333381970724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,2,128,1,float16,float16,0,0.0189280000825723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,4,128,1,float16,float16,0,0.025909334421157837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,fp8,0,0.01629866659641266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,48,128,1,float16,float16,0,0.07976533472537994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,8,128,1,float16,float16,0,0.03957866628964742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.13354667027791342
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.022197333474953968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.03374933451414108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,1,128,1,float16,float16,0,0.00978133330742518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.07286933561166127
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,2,128,1,float16,float16,0,0.014080000420411428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,8,128,1,float16,float16,0,0.024703999360402424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,4,128,1,float16,float16,0,0.017781333376963932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,1,128,1,float16,float16,0,0.009322666873534521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,48,128,1,float16,float16,0,0.04204266766707102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,2,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,fp8,0,0.01228800043463707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.012373333175977072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,fp8,0,0.012261333564917246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,4,128,1,float16,float16,0,0.013381333400805792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.04228800038496653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,1,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,8,128,1,float16,float16,0,0.01701333373785019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,48,128,1,float16,float16,0,0.025253333151340485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,fp8,0,0.011674666156371435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,2,128,1,float16,float16,0,0.012831999609867731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,fp8,0,0.011765333513418833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.014405333747466406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.011898666620254517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,48,128,1,float16,float16,0,0.01523200049996376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,8,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,1,128,1,float16,float16,0,0.008986666798591614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.013855999956528345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.02794666588306427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,2,128,1,float16,float16,0,0.01259200026591619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,4,128,1,float16,float16,0,0.01267733300725619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,8,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,48,128,1,float16,float16,0,0.011514666179815928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.013370666652917862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,2,128,1,float16,float16,0,0.012240000069141388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.021738665799299877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,4,128,1,float16,float16,0,0.01239466667175293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,8,128,1,float16,float16,0,0.01240533341964086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.01146666705608368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,48,128,1,float16,float16,0,0.008410666758815447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,2,128,1,float16,float16,0,0.012144000579913458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,4,128,1,float16,float16,0,0.012138667205969492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,8,128,1,float16,float16,0,0.012074666718641916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.016208000481128693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,2,128,1,float16,float16,0,0.008538666491707167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,48,128,1,float16,float16,0,0.008314666648705801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,4,128,1,float16,float16,0,0.008757333581646284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,8,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.014592000593741735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,4.611418724060059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,5.778341293334961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,9.638373057047525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,26.63916778564453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,2.323925336201986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,40,128,1,float16,float16,0,70.74328104654948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,3.098479906717936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,1,128,1,float16,float16,0,64.45499674479167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,4.795968055725098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,2,128,1,float16,float16,0,67.62880452473958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,14.063162485758463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,1,128,1,float16,float16,0,136.7109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,2,128,1,float16,float16,0,136.80502319335938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,4,128,1,float16,float16,0,140.54515584309897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,8,128,1,float16,float16,0,140.14651489257812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,1.188442627588908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,1.451327959696452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,4,128,1,float16,float16,0,67.64746602376302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,40,128,1,float16,float16,0,34.822591145833336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,2.4619253476460776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,8,128,1,float16,float16,0,66.47286478678386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,145.99910481770834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,6.88754145304362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,1,128,1,float16,float16,0,33.26055908203125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,0.5977973143259684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,2,128,1,float16,float16,0,34.78375498453776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,0.8102880318959554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,4,128,1,float16,float16,0,33.51153564453125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,40,128,1,float16,float16,0,18.160964965820312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,1.3586613337198894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,1,128,1,float16,float16,0,17.448319753011067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,3.129103978474935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,2,128,1,float16,float16,0,16.94063440958659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,3.410917282104492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,8,128,1,float16,float16,0,34.89686838785807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,4,128,1,float16,float16,0,17.41367467244466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,4.11360518137614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,8,128,1,float16,float16,0,16.648794809977215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,6.279792149861653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,38.12793477376302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,73.08585611979167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,15.446980794270834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,1.7083733876546223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,40,128,1,float16,float16,0,39.96980285644531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,2.166858673095703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,1,128,1,float16,float16,0,38.13348388671875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,2,128,1,float16,float16,0,38.08315785725912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,3.7344481150309243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,1,128,1,float16,float16,0,80.8912862141927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,7.81550407409668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,2,128,1,float16,float16,0,77.6272684733073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,4,128,1,float16,float16,0,78.13476053873698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,0.8850133419036865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,8,128,1,float16,float16,0,79.20973714192708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,1.0731680393218994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,40,128,1,float16,float16,0,20.056480407714844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,1,128,1,float16,float16,0,19.06944529215495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,4,128,1,float16,float16,0,39.15284729003906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,1.7021066347757976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,84.79685974121094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,2,128,1,float16,float16,0,19.17101287841797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,4.4869333902994795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,8,128,1,float16,float16,0,40.19317372639974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,0.42611201604207355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,4,128,1,float16,float16,0,21.446917215983074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,0.5318986574808756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,0.83350936571757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,40,128,1,float16,float16,0,10.474704106648764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,1,128,1,float16,float16,0,8.973450978597006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,2.018517335255941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,2,128,1,float16,float16,0,9.053871790568033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,8,128,1,float16,float16,0,19.02947743733724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,39.705963134765625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,4,128,1,float16,float16,0,10.063599904378256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,8,128,1,float16,float16,0,9.466618855794271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,3.004192034403483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,3.3351093928019204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,4.966490745544434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,21.57561492919922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,11.357893625895182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,1.5085867245992024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,40,128,1,float16,float16,0,28.20269775390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,1,128,1,float16,float16,0,27.341280619303387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,1.768074671427409
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,2.590768019358317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,2,128,1,float16,float16,0,26.920420328776043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,1,128,1,float16,float16,0,53.28754679361979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,2,128,1,float16,float16,0,54.85531107584635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,5.647541046142578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,4,128,1,float16,float16,0,55.21993509928385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,8,128,1,float16,float16,0,53.87214152018229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,0.7188586393992106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,4,128,1,float16,float16,0,27.751841227213543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,0.8847413063049316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,40,128,1,float16,float16,0,14.465211232503256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,58.61168416341146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,1,128,1,float16,float16,0,13.041984558105469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,8,128,1,float16,float16,0,26.66742451985677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.3607734044392903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,2,128,1,float16,float16,0,13.92526880900065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.36237867673238117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,2.9654668172200522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.4463786681493123
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,40,128,1,float16,float16,0,6.4227949778238935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,4,128,1,float16,float16,0,13.20974349975586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,1,128,1,float16,float16,0,5.898576100667317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.6897173722585043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,2,128,1,float16,float16,0,6.359530766805013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,4,128,1,float16,float16,0,5.72047487894694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,1.5841439565022786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,8,128,1,float16,float16,0,14.715269724527994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,29.835433959960938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,4.481162707010905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,14.373818715413412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,8,128,1,float16,float16,0,6.114773432413737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,5.433450698852539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,7.2711842854817705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,15.612869262695312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,2.247445265452067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,40,128,1,float16,float16,0,37.6585439046224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,2.7497971852620444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,1,128,1,float16,float16,0,35.8447519938151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,3.706618626912435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,2,128,1,float16,float16,0,33.99228159586588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,7.895952224731445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,1,128,1,float16,float16,0,73.64047749837239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,2,128,1,float16,float16,0,72.36033121744792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,4,128,1,float16,float16,0,73.30732727050781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,8,128,1,float16,float16,0,73.5628153483073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,1.1316853364308674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.2999893029530842
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,4,128,1,float16,float16,0,35.69543965657552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,74.39885965983073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,1.9116640090942383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,40,128,1,float16,float16,0,18.93361536661784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,1,128,1,float16,float16,0,16.958890279134113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,8,128,1,float16,float16,0,35.87549845377604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,3.853893280029297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.5953546762466431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,2,128,1,float16,float16,0,18.559642791748047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,0.6677546501159668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,4,128,1,float16,float16,0,18.213695526123047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,40,128,1,float16,float16,0,8.840986887613932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,1,128,1,float16,float16,0,8.733194351196289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,1.110378662745158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,2.130352020263672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,2,128,1,float16,float16,0,7.871866861979167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,8,128,1,float16,float16,0,18.560218811035156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,40,128,1,float16,float16,0,4.114000002543132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.20111999909083048
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,4,128,1,float16,float16,0,8.485045115152994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.3181599974632263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,8,128,1,float16,float16,0,8.852650960286459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,1,128,1,float16,float16,0,3.856133460998535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.5342186689376831
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,18.600016276041668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,37.07709248860677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,1.0723786354064941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,2,128,1,float16,float16,0,3.4408534367879233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,4,128,1,float16,float16,0,3.8255465825398765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,8,128,1,float16,float16,0,3.7552693684895835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,3.334005355834961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,8.761162439982096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,3.7525227864583335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,5.0782772699991865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,9.322101593017578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,1.6686612764994304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,1,128,1,float16,float16,0,19.993061065673828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,40,128,1,float16,float16,0,23.940523783365887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,1.8967413902282715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.6201705932617188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,2,128,1,float16,float16,0,19.684613545735676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,1,128,1,float16,float16,0,39.24098205566406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,2,128,1,float16,float16,0,41.85838826497396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,4,128,1,float16,float16,0,41.24325815836588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,4.7900800704956055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,8,128,1,float16,float16,0,41.6825917561849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,0.8333173592885336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,0.9472800095876058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,40,128,1,float16,float16,0,10.53160031636556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,4,128,1,float16,float16,0,21.233013153076172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.344773292541504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,43.71954345703125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,8,128,1,float16,float16,0,20.153087615966797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,1,128,1,float16,float16,0,9.825504302978516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,2,128,1,float16,float16,0,10.13316281636556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,4,128,1,float16,float16,0,9.344847997029623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.4215039809544881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,2.494373321533203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.48865067958831787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,1,128,1,float16,float16,0,4.536783854166667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,40,128,1,float16,float16,0,4.781797409057617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.7008907000223795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,2,128,1,float16,float16,0,4.582714716593425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,8,128,1,float16,float16,0,10.105264027913412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,1.2614453633626301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,4,128,1,float16,float16,0,3.9139200846354165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,40,128,1,float16,float16,0,2.5209439595540366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,8,128,1,float16,float16,0,4.297333399454753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.07769066592057546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,21.913920084635418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,10.648239771525065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.11624000469843547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,1,128,1,float16,float16,0,2.0232532819112143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.28885332743326825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,2,128,1,float16,float16,0,2.121338685353597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.6789920330047607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,4,128,1,float16,float16,0,1.981887976328532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,8,128,1,float16,float16,0,2.097205320994059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,5.059871991475423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.80950927734375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,4.637776056925456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,6.468629201253255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,10.667989095052084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,40,128,1,float16,float16,0,21.80121103922526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,1,128,1,float16,float16,0,19.12884267171224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.313770612080892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.4422133763631186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,3.1289332707722983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,2,128,1,float16,float16,0,39.69549814860026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,1,128,1,float16,float16,0,38.510693868001304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,4,128,1,float16,float16,0,38.452901204427086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,8,128,1,float16,float16,0,40.9367421468099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,5.411893208821614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.1014186541239421
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,2,128,1,float16,float16,0,18.939178466796875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.2952053546905518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,4,128,1,float16,float16,0,18.658186594645183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,40,128,1,float16,float16,0,9.855946858723959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,1,128,1,float16,float16,0,8.74024518330892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,41.09547678629557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.6504106521606445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,8,128,1,float16,float16,0,18.910821278889973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,2,128,1,float16,float16,0,9.431866963704428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,2.638597329457601
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.5510506629943848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,40,128,1,float16,float16,0,4.75767453511556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,4,128,1,float16,float16,0,8.770495732625326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.6098186572392782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.8339146773020426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,1,128,1,float16,float16,0,3.6216586430867515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,19.962117513020832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,2,128,1,float16,float16,0,3.8039681116739907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,8,128,1,float16,float16,0,8.338709513346354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,1.3246560096740723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,8,128,1,float16,float16,0,3.823221206665039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,4,128,1,float16,float16,0,3.6772588094075522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.17278399070103964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,1,128,1,float16,float16,0,2.144890626271566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.26071999470392865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,40,128,1,float16,float16,0,2.3770453135172525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.4133066733678182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,2,128,1,float16,float16,0,1.7537493705749512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.6547786792119344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,9.860517501831055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,8,128,1,float16,float16,0,1.9511680603027344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,4,128,1,float16,float16,0,1.8833279609680176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.046767999728520714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,1,128,1,float16,float16,0,0.8876746495564779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,40,128,1,float16,float16,0,1.1858987013498943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.06681066751480103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.12693867087364197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,4.3897600173950195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,4,128,1,float16,float16,0,0.9202133019765218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.3057120045026143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,8,128,1,float16,float16,0,0.9394773642222086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,2,128,1,float16,float16,0,1.0057919820149739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,2.273130734761556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,3.252047856648763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,3.800175984700521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,4.421610514322917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,7.417472203572591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.7489387194315593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,1,128,1,float16,float16,0,10.467456181844076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,40,128,1,float16,float16,0,11.746490478515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.7991092999776204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,1,128,1,float16,float16,0,22.010111490885418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,4,128,1,float16,float16,0,22.39044698079427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,2,128,1,float16,float16,0,22.3177007039388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,2.229423999786377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,8,128,1,float16,float16,0,22.332687377929688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,3.501162528991699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,2,128,1,float16,float16,0,10.161578496297201
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.8197759787241617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,4,128,1,float16,float16,0,10.59009043375651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,40,128,1,float16,float16,0,5.75212287902832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,1,128,1,float16,float16,0,4.126986821492513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.8899412949879965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,23.865636189778645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,1.165999968846639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,8,128,1,float16,float16,0,10.5283571879069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,2,128,1,float16,float16,0,4.4986826578776045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,1.850602626800537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,4,128,1,float16,float16,0,4.791738510131836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,11.450010935465494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,40,128,1,float16,float16,0,2.8608853022257485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.4307839870452881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,1,128,1,float16,float16,0,2.1717440287272134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,8,128,1,float16,float16,0,4.490357398986816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.4634079933166504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.5622666676839193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.9608000119527181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,2,128,1,float16,float16,0,2.056005318959554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,4,128,1,float16,float16,0,2.0964852968851724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,40,128,1,float16,float16,0,1.4208159446716309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.06307733555634816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,8,128,1,float16,float16,0,2.196928024291992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,1,128,1,float16,float16,0,1.1410986582438152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,5.396986643473308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,2,128,1,float16,float16,0,1.0479626655578613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.1859626571337382
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.08800533413887024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,4,128,1,float16,float16,0,1.0676906903584797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.47327999273935956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.0360000009338061
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,40,128,1,float16,float16,0,0.7328853607177734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,1,128,1,float16,float16,0,0.5371466477711996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,8,128,1,float16,float16,0,1.1860960324605305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,2.6940107345581055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.04499199986457825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,2,128,1,float16,float16,0,0.5728746652603149
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,8,128,1,float16,float16,0,0.538538654645284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,4,128,1,float16,float16,0,0.5494079987208048
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,1.3856372833251953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.0942186713218689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.19946134090423584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,4.637392044067383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,5.870037078857422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,4.932661374409993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,8.438639958699545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,1,128,1,float16,float16,0,21.415791829427082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,4,128,1,float16,float16,0,22.225262959798176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,1,128,1,float16,float16,0,9.470656077067057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,8,128,1,float16,float16,0,22.041664123535156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,40,128,1,float16,float16,0,12.706000010172525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,2,128,1,float16,float16,0,21.62999979654948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,2.3675840695699057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,2.8226613998413086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,2.313360055287679
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,4.070032119750977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,2,128,1,float16,float16,0,10.058607737223307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,4,128,1,float16,float16,0,9.669445037841797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,1.1573066711425781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,1,128,1,float16,float16,0,3.9515466690063477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,8,128,1,float16,float16,0,10.300799687703451
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,1.1818880240122478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,24.01269276936849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,40,128,1,float16,float16,0,6.091072082519531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,1.421770731608073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,2,128,1,float16,float16,0,4.357445398966472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,4,128,1,float16,float16,0,4.047365188598633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.5363786617914835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,2.085103988647461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,11.131231943766275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,8,128,1,float16,float16,0,4.657621383666992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,1,128,1,float16,float16,0,1.8790133794148762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.582314650217692
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,2,128,1,float16,float16,0,1.981317361195882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,4,128,1,float16,float16,0,1.9525440533955891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,40,128,1,float16,float16,0,2.9399840037027993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.7227413654327393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,40,128,1,float16,float16,0,1.4862027168273926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,1.0720853010813396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,1,128,1,float16,float16,0,0.9924373626708984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,8,128,1,float16,float16,0,2.227519989013672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,2,128,1,float16,float16,0,0.9861120382944742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,5.3350880940755205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.16114133596420288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.32974932591120404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.25120000044504803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,40,128,1,float16,float16,0,0.7718400160471598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,4,128,1,float16,float16,0,1.0709493160247803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,8,128,1,float16,float16,0,1.0454613367716472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.4813813368479411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,1,128,1,float16,float16,0,0.4872426589330037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.03664000084002813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,4,128,1,float16,float16,0,0.5198826789855957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.08375466863314311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.04696533580621084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,2.5766612688700357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,8,128,1,float16,float16,0,0.544160008430481
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.16891199350357056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,2,128,1,float16,float16,0,0.5296586751937866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,40,128,1,float16,float16,0,0.36260799566904706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,1,128,1,float16,float16,0,0.2746559977531433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.02903466671705246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,2,128,1,float16,float16,0,0.2665226658185323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,1.299301306406657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.03389866650104523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.051407997806866966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,8,128,1,float16,float16,0,0.27461334069569904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,4,128,1,float16,float16,0,0.2807946602503459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.659226655960083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.11213866869608562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,3.475722630818685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,3.509402592976888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,4.413605372111003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,1,128,1,float16,float16,0,11.2260373433431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,4,128,1,float16,float16,0,12.317253112792969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,8,128,1,float16,float16,0,13.356005350748697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,5.749386469523112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,2,128,1,float16,float16,0,11.640368143717447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,1,128,1,float16,float16,0,5.079050699869792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,40,128,1,float16,float16,0,7.54203732808431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,1.757797400156657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,1.7515519460042317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,2,128,1,float16,float16,0,4.655973434448242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,3.0535786946614585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,8,128,1,float16,float16,0,5.287866592407227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,40,128,1,float16,float16,0,3.889237403869629
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,14.235248565673828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,2.2055946985880532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,4,128,1,float16,float16,0,5.0377546946207685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.8780852953592936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.9327786763509115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,2,128,1,float16,float16,0,2.2523892720540366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,1,128,1,float16,float16,0,2.2010293006896973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,4,128,1,float16,float16,0,2.5517120361328125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,6.643477121988933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,8,128,1,float16,float16,0,2.5200106302897134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,1.0722880363464355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,1.5218346913655598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.42521599928538006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.42505598068237305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.5079786777496338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,1,128,1,float16,float16,0,1.130400021870931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,2,128,1,float16,float16,0,1.3762987454732258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,4,128,1,float16,float16,0,1.2539093494415283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.6810133457183838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,40,128,1,float16,float16,0,2.038479963938395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,8,128,1,float16,float16,0,1.2762347062428792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,40,128,1,float16,float16,0,0.9821759859720866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,2,128,1,float16,float16,0,0.604634682337443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,1,128,1,float16,float16,0,0.6149066686630249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.05054399867852529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,3.337498664855957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.06863999863465627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,4,128,1,float16,float16,0,0.626746654510498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.14493866761525473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,8,128,1,float16,float16,0,0.6371253331502279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,1.6180853843688965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.336575984954834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,40,128,1,float16,float16,0,0.4859573443730672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.03998400022586187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,2,128,1,float16,float16,0,0.30300267537434894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.03143466760714849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.07425066828727722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,1,128,1,float16,float16,0,0.3073546687761943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,4,128,1,float16,float16,0,0.3112373352050781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.7774879932403564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.12371733784675598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,8,128,1,float16,float16,0,0.3160960078239441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,40,128,1,float16,float16,0,0.1834933360417684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,1,128,1,float16,float16,0,0.1692906618118286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.022965334355831146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.02958933264017105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,4,128,1,float16,float16,0,0.17258665959040323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.0391146664818128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,2,128,1,float16,float16,0,0.1702453295389811
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,8,128,1,float16,float16,0,0.1732906699180603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.07745066781838734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.37302935123443604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,4.6332747141520185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,7.580223719278972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,4.687445322672526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,8,128,1,float16,float16,0,12.920890808105469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,5.495882670084636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,2,128,1,float16,float16,0,12.887519836425781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,1,128,1,float16,float16,0,12.501407623291016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,2.181061267852783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,4,128,1,float16,float16,0,11.327835083007812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,1,128,1,float16,float16,0,4.547744115193685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,2.3322879473368325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,40,128,1,float16,float16,0,8.376117070515951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,2,128,1,float16,float16,0,4.4628801345825195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,3.5708694458007812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,2.777205467224121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,4,128,1,float16,float16,0,5.006661415100098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,1.0921759605407715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,40,128,1,float16,float16,0,4.226261456807454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,1,128,1,float16,float16,0,2.2215733528137207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,8,128,1,float16,float16,0,5.181952158610026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,14.614058176676432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,4,128,1,float16,float16,0,2.3364052772521973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,6.708853403727214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,8,128,1,float16,float16,0,2.536234696706136
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,1.8411572774251301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,1.445306619008382
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,1.174026648203532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,40,128,1,float16,float16,0,2.1936747233072915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,2,128,1,float16,float16,0,2.196293354034424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.5341493288675944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,2,128,1,float16,float16,0,1.1164586544036865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,1,128,1,float16,float16,0,1.1009653409322102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.6149119933446249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.6700960000356039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,8,128,1,float16,float16,0,1.243882656097412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.8686239719390869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.15009599924087524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,1,128,1,float16,float16,0,0.5670293172200521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,4,128,1,float16,float16,0,1.19596266746521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,40,128,1,float16,float16,0,1.084938685099284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,3.506709416707357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,2,128,1,float16,float16,0,0.5841013193130493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.22855999072392783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,4,128,1,float16,float16,0,0.5996853510538737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.40220268567403156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,1,128,1,float16,float16,0,0.2845226724942525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,40,128,1,float16,float16,0,0.5161226590474447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.31402132908503216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,8,128,1,float16,float16,0,0.6499679883321127
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,2,128,1,float16,float16,0,0.2906399965286255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.822538693745931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,1.787157376607259
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.033674667278925575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.04110399881998698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,4,128,1,float16,float16,0,0.3245013356208801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,8,128,1,float16,float16,0,0.33555734157562256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.11889599760373433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.06717333197593689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,1,128,1,float16,float16,0,0.15598400433858237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.027589333554108936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,2,128,1,float16,float16,0,0.1585919956366221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.022554665803909302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,4,128,1,float16,float16,0,0.16196800271670023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,8,128,1,float16,float16,0,0.15804266929626465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.04098133246103922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,40,128,1,float16,float16,0,0.2566293279329936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.3993813196818034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,40,128,1,float16,float16,0,0.09649067123730977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.06657599906126659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,1,128,1,float16,float16,0,0.08925867080688477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.08989866574605306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,2,128,1,float16,float16,0,0.09173867106437683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.09457066655158997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,4,128,1,float16,float16,0,0.09284266829490662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.10312533378601074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.2688960035641988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.12370666861534119
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,8,128,1,float16,float16,0,0.09246399998664856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,4.637898763020833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,4.707584063212077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,1,128,1,float16,float16,0,7.851856231689453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,2,128,1,float16,float16,0,8.600773493448893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,5.5185597737630205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,1,128,1,float16,float16,0,2.726383845011393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,40,128,1,float16,float16,0,7.981514612833659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,2.166096051534017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,4,128,1,float16,float16,0,9.257743835449219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,2.340991973876953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,7.192698796590169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,10.184666951497396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,2,128,1,float16,float16,0,2.8801441192626953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,8,128,1,float16,float16,0,9.35764249165853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,1,128,1,float16,float16,0,1.3579146067301433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,8,128,1,float16,float16,0,3.7292426427205405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,2.7695414225260415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,4,128,1,float16,float16,0,3.2765121459960938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,2,128,1,float16,float16,0,1.427504062652588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,3.587552070617676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,5.231712023417155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,4,128,1,float16,float16,0,1.5822666486104329
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,1.44049072265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,1.1717066764831543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,40,128,1,float16,float16,0,2.1901067097981772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,1.7794826825459797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,8,128,1,float16,float16,0,1.8293280601501465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.5337493419647217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,2.6197813351949057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,2,128,1,float16,float16,0,0.7351199785868326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,1,128,1,float16,float16,0,0.6958666642506918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.6376266479492188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.5631680091222128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,4,128,1,float16,float16,0,0.7922186851501465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,40,128,1,float16,float16,0,1.011413335800171
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,1,128,1,float16,float16,0,0.37273601690928143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.14898133277893066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,1.0985386371612549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,8,128,1,float16,float16,0,0.9190186659495035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,2,128,1,float16,float16,0,0.3808000087738037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.2227999965349833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,4,128,1,float16,float16,0,0.4070826768875122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.3071146607398987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,1.2880053520202637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,40,128,1,float16,float16,0,4.071850776672363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.9147946834564209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,40,128,1,float16,float16,0,0.48279468218485516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.03323200096686681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,1,128,1,float16,float16,0,0.1704053282737732
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.37930134932200116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,8,128,1,float16,float16,0,0.4451253414154053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,4,128,1,float16,float16,0,0.2120479941368103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.05938133100668589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,8,128,1,float16,float16,0,0.22842133045196533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.5869280099868774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.09584533174832661
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,40,128,1,float16,float16,0,0.19618666172027588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,2,128,1,float16,float16,0,0.18048532803853354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.03875199953715006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.022698665658632915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,2,128,1,float16,float16,0,0.09455466270446777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,4,128,1,float16,float16,0,0.09888533751169841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.0349386657277743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.2518826723098755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,1,128,1,float16,float16,0,0.09117866555849712
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.024362665911515553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,8,128,1,float16,float16,0,0.09830933809280396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.048453330993652344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,1,128,1,float16,float16,0,0.05226666728655497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,40,128,1,float16,float16,0,0.062021334966023765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,2,128,1,float16,float16,0,0.053082664807637535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.08949866890907288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.09107200304667155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,4,128,1,float16,float16,0,0.055029332637786865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.09715732932090759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,8,128,1,float16,float16,0,0.055546666185061135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,40,128,1,float16,float16,0,0.0348693331082662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.11003733674685161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.19549866517384848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,1,128,1,float16,float16,0,0.030495998760064442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.052373334765434265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.0526506652434667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,2,128,1,float16,float16,0,0.031114667654037476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,4,128,1,float16,float16,0,0.03190399954716364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.058005332946777344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,8,128,1,float16,float16,0,0.031850665807724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.060965334375699363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.10717333356539409
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,2.317413330078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,1,128,1,float16,float16,0,2.610095977783203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,2,128,1,float16,float16,0,2.8772106170654297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,2.5290187199910483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,2.951109250386556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,4,128,1,float16,float16,0,3.0666987101236978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,1,128,1,float16,float16,0,1.2019253571828206
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,40,128,1,float16,float16,0,4.041200002034505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,1.1511840025583904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,8,128,1,float16,float16,0,3.8827892939249673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,2,128,1,float16,float16,0,1.1934773127237956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,1.1570613384246826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,4.249749183654785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,4,128,1,float16,float16,0,1.4650506973266602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,1.4478559494018555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,3.6109654108683267
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,40,128,1,float16,float16,0,2.029263973236084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,1.798085371653239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,8,128,1,float16,float16,0,1.6692585945129395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,2.205440044403076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.5344693263371786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,4,128,1,float16,float16,0,0.7195946375528971
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,2,128,1,float16,float16,0,0.6320373217264811
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,1,128,1,float16,float16,0,0.5838720003763834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,40,128,1,float16,float16,0,1.0174667040507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.6378666559855143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,8,128,1,float16,float16,0,0.8281599680582682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,1,128,1,float16,float16,0,0.290282666683197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.5673706531524658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.1583466629187266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,2,128,1,float16,float16,0,0.3112746675809224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.22631466388702393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,1.0368746916453044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,8,128,1,float16,float16,0,0.40776534875233966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.9051520029703776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,4,128,1,float16,float16,0,0.32530667384465534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.3064853350321452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,40,128,1,float16,float16,0,0.5216746727625529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.03257066756486893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.038047999143600464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,1,128,1,float16,float16,0,0.11965866883595784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.4516799847284953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,4,128,1,float16,float16,0,0.15013866623242697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.37276268005371094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,8,128,1,float16,float16,0,0.17256534099578857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.054586668809254967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.0759200006723404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,40,128,1,float16,float16,0,0.16962132851282755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,1,128,1,float16,float16,0,0.06427200138568878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,2,128,1,float16,float16,0,0.06628266473611195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.024341332415739696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,4,128,1,float16,float16,0,0.07023466626803081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,2,128,1,float16,float16,0,0.12498666842778523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.03173333406448364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.039642666776975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.022570667167504627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.19088532527287802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,40,128,1,float16,float16,0,0.04614399870236715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.09006399909655254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,8,128,1,float16,float16,0,0.07090133428573608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,1,128,1,float16,float16,0,0.035717333356539406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,2,128,1,float16,float16,0,0.036373332142829895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.09070932865142822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,8,128,1,float16,float16,0,0.03835733234882355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.09382399916648865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,40,128,1,float16,float16,0,0.026181332767009735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,4,128,1,float16,float16,0,0.038405333956082664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.16470932960510254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.10311466455459595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,1,128,1,float16,float16,0,0.02214933435122172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,4,128,1,float16,float16,0,0.023749334116776783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.05192000170548757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,2,128,1,float16,float16,0,0.02218666672706604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,8,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.05189333359400431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.05388799806435903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.08521599570910136
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,1,128,1,float16,float16,0,0.016522667060295742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,2,128,1,float16,float16,0,0.01639466608564059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.033226666351159416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.05797866483529409
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,4,128,1,float16,float16,0,0.016629333297411602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,40,128,1,float16,float16,0,0.018325333793958027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.033402666449546814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,8,128,1,float16,float16,0,0.016549333930015564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.03730133424202601
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,1,128,1,float16,float16,0,1.1710186799367268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,1.1749546527862549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,1.1550559997558594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,2,128,1,float16,float16,0,1.3470826148986816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,4,128,1,float16,float16,0,1.4987786610921223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,1.4637494087219238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,8,128,1,float16,float16,0,1.7433546384175618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,40,128,1,float16,float16,0,2.183157285054525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,1,128,1,float16,float16,0,0.6008906761805216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,1.8062292734781902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,2,128,1,float16,float16,0,0.6019786596298218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.5347520112991333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,2.1475680669148765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.5679306586583456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,8,128,1,float16,float16,0,0.8522133032480875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,40,128,1,float16,float16,0,1.0177120367685955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.15756799777348837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.6607040166854858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,1,128,1,float16,float16,0,0.2972960074742635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,4,128,1,float16,float16,0,0.7327520052591959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.9419679641723633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,4,128,1,float16,float16,0,0.31938666105270386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.24871466557184854
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,40,128,1,float16,float16,0,0.4867946704228719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.3760853211085002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.3049973249435425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.9144426981608073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.0329066663980484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,1,128,1,float16,float16,0,0.09457600116729736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,2,128,1,float16,float16,0,0.282858669757843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,8,128,1,float16,float16,0,0.43731733163197833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.03762666632731756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.3686986764272054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.05470933516820272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,2,128,1,float16,float16,0,0.10013332962989807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,1,128,1,float16,float16,0,0.050437331199645996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,8,128,1,float16,float16,0,0.16741865873336792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.06725333134333293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,2,128,1,float16,float16,0,0.051957334081331887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.022469334304332733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.024501333634058636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.15051199992497763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.031530665854612984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,40,128,1,float16,float16,0,0.1658453345298767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,4,128,1,float16,float16,0,0.055946667989095054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,8,128,1,float16,float16,0,0.056176001826922096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.036277333895365395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,4,128,1,float16,float16,0,0.12236799796422322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.017594666530688603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,2,128,1,float16,float16,0,0.029440000653266907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,40,128,1,float16,float16,0,0.0371573343873024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,1,128,1,float16,float16,0,0.028714666763941448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.018474667022625606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,4,128,1,float16,float16,0,0.03166399896144867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,8,128,1,float16,float16,0,0.03181333343187968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.021525333325068157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,40,128,1,float16,float16,0,0.02276266614596049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,1,128,1,float16,float16,0,0.01859733338157336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.02272533377011617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.0720000018676122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,2,128,1,float16,float16,0,0.01882133384545644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,4,128,1,float16,float16,0,0.01977066695690155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,8,128,1,float16,float16,0,0.01958400011062622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.039018665750821434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,40,128,1,float16,float16,0,0.015317333241303762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,1,128,1,float16,float16,0,0.01339200014869372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,2,128,1,float16,float16,0,0.01350933313369751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.013760000467300415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.01370666672786077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,4,128,1,float16,float16,0,0.013866666704416275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,8,128,1,float16,float16,0,0.0138026662170887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,40,128,1,float16,float16,0,0.010175999874869982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.014032000054915747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.025813333690166473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.0138026662170887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,1,128,1,float16,float16,0,0.009296000003814697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,2,128,1,float16,float16,0,0.009237333511312803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,4,128,1,float16,float16,0,0.009482666850090027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,8,128,1,float16,float16,0,0.009392000113924345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.013466666142145792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.018858666221300762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,2,128,1,float16,float16,0,0.6171040137608846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.6408640146255493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.533957322438558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.6133226553599039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,1,128,1,float16,float16,0,0.5763200124104818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,4,128,1,float16,float16,0,0.6738186677296957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.8608960310618082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,40,128,1,float16,float16,0,1.0230027039845784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,8,128,1,float16,float16,0,0.9112959702809652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.15065600474675497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,1,128,1,float16,float16,0,0.27485867341359455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.9426986376444498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.21810666720072427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,2,128,1,float16,float16,0,0.30807467301686603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,4,128,1,float16,float16,0,0.3218773404757182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.3470613161722819
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,40,128,1,float16,float16,0,0.517413338025411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,1,128,1,float16,float16,0,0.08804800113042195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.3553866545359294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.0337119996547699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,2,128,1,float16,float16,0,0.0944106678167979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,8,128,1,float16,float16,0,0.4484906593958537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.054511999090512596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,4,128,1,float16,float16,0,0.11292800307273865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.03770666569471359
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,40,128,1,float16,float16,0,0.1518880029519399
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,1,128,1,float16,float16,0,0.04789866507053375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.3060426712036133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,2,128,1,float16,float16,0,0.04935466746489207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.022319999833901722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,4,128,1,float16,float16,0,0.05310399830341339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.06563200056552887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,8,128,1,float16,float16,0,0.053397332628568016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.035962666074434914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.024256000916163128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,40,128,1,float16,float16,0,0.03457599878311157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,8,128,1,float16,float16,0,0.16526400049527487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,1,128,1,float16,float16,0,0.026565333207448322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,2,128,1,float16,float16,0,0.027776000400384266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.11487999558448792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,8,128,1,float16,float16,0,0.030063999195893604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.022554665803909302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.03163733333349228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.01814933369557063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.014917333920796713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,1,128,1,float16,float16,0,0.01740266631046931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.05035733183224996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.030426666140556335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,2,128,1,float16,float16,0,0.01749333366751671
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,4,128,1,float16,float16,0,0.0295413335164388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,4,128,1,float16,float16,0,0.018383999665578205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,40,128,1,float16,float16,0,0.021210665504137676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.016261332978804905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.013605333864688873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,40,128,1,float16,float16,0,0.014933332800865173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,1,128,1,float16,float16,0,0.01268799975514412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.014069333672523499
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,8,128,1,float16,float16,0,0.018672000616788864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,4,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.013722666849692663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,8,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,2,128,1,float16,float16,0,0.012784000486135483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.013749333719412485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,40,128,1,float16,float16,0,0.00996800015370051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,4,128,1,float16,float16,0,0.00897066667675972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,1,128,1,float16,float16,0,0.00847999999920527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,2,128,1,float16,float16,0,0.008527999743819237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,8,128,1,float16,float16,0,0.009088000282645226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,40,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,4,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,8,128,1,float16,float16,0,0.008714666590094566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.2099626660346985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,1,128,1,float16,float16,0,0.3474666674931844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.2872320016225179
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,2,128,1,float16,float16,0,0.35235734780629474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,4,128,1,float16,float16,0,0.4010133345921834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.3351093530654907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,8,128,1,float16,float16,0,0.44490134716033936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,1,128,1,float16,float16,0,0.1523306667804718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.03721066564321518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,2,128,1,float16,float16,0,0.15635200341542563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,40,128,1,float16,float16,0,0.5144960085550944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.4217653274536133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,4,128,1,float16,float16,0,0.17314666509628296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.47275201479593915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.06954666475454967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,8,128,1,float16,float16,0,0.2021013299624125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.04452266792456309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,2,128,1,float16,float16,0,0.08133333424727122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.02619733413060506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.20409067471822104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,40,128,1,float16,float16,0,0.18881599108378092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,4,128,1,float16,float16,0,0.08444266517957051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.028373333315054577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.10500267148017883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,8,128,1,float16,float16,0,0.08529067039489746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.03642666588226954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,40,128,1,float16,float16,0,0.05138133466243744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,2,128,1,float16,float16,0,0.04433600107828776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.08583999673525493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,1,128,1,float16,float16,0,0.04349866509437561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.022074667116006214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,1,128,1,float16,float16,0,0.07931200166543324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.05118933320045471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,40,128,1,float16,float16,0,0.028463999430338543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.025360000630219776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.020821332931518555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,4,128,1,float16,float16,0,0.04637866715590159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,8,128,1,float16,float16,0,0.04574400186538696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.02756800005833308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,2,128,1,float16,float16,0,0.024293333292007446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,4,128,1,float16,float16,0,0.025445332129796345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.018645333747069042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.01939733326435089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,1,128,1,float16,float16,0,0.024234667420387268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,40,128,1,float16,float16,0,0.018272000054518383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.020234666764736176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,1,128,1,float16,float16,0,0.016186666985352833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,8,128,1,float16,float16,0,0.02532800038655599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.02088533341884613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.0469813346862793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.018021332720915478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,2,128,1,float16,float16,0,0.016208000481128693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,8,128,1,float16,float16,0,0.016399999459584553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.018197332819302876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,4,128,1,float16,float16,0,0.016442666451136272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,40,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,1,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.033413333197434746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,2,128,1,float16,float16,0,0.01198400060335795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.019637333850065868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,4,128,1,float16,float16,0,0.0120319997270902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,8,128,1,float16,float16,0,0.012005332857370377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,1,128,1,float16,float16,0,0.008378666515151659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,40,128,1,float16,float16,0,0.008853333070874214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,4,128,1,float16,float16,0,0.008320000022649765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,2,128,1,float16,float16,0,0.008277333031098047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,8,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.01877333347996076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,2,128,1,float16,float16,0,0.00808533343176047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,1,128,1,float16,float16,0,0.008063999935984612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,40,128,1,float16,float16,0,0.00855466661353906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.016538667182127636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,8,128,1,float16,float16,0,0.008197333042820295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,4,128,1,float16,float16,0,0.008229333286484083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.0179626668492953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,2,128,1,float16,float16,0,0.2861599922180176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.09668266773223877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.039520000418027244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,1,128,1,float16,float16,0,0.28149867057800293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.05706666906674703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,4,128,1,float16,float16,0,0.3109760085741679
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,8,128,1,float16,float16,0,0.33284799257914227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.17457600434621176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,40,128,1,float16,float16,0,0.24106132984161377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.026613332331180573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,1,128,1,float16,float16,0,0.14249066511789957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.02994133283694585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.2885226607322693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,2,128,1,float16,float16,0,0.14432000120480856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,4,128,1,float16,float16,0,0.14840533336003622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,40,128,1,float16,float16,0,0.08252266546090443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,8,128,1,float16,float16,0,0.1483626663684845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,1,128,1,float16,float16,0,0.07431999842325847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.07520000139872234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.05017066498597463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.12940800189971924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.022367998957633972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.02089600016673406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,2,128,1,float16,float16,0,0.07524266839027405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,4,128,1,float16,float16,0,0.07691200077533722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,40,128,1,float16,float16,0,0.04474666714668274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,8,128,1,float16,float16,0,0.07724800209204356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.02701333413521449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,1,128,1,float16,float16,0,0.04116799930731455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.018538666268189747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,4,128,1,float16,float16,0,0.04186666508515676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.018592000007629395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,2,128,1,float16,float16,0,0.04092800120512644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.041109333435694374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.0717439999183019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,8,128,1,float16,float16,0,0.042122667034467064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.02075200031201045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,40,128,1,float16,float16,0,0.0249439999461174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.022597332795461018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,1,128,1,float16,float16,0,0.023045333723227184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.018122666825850803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.040565334260463715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,2,128,1,float16,float16,0,0.023013333479563396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,4,128,1,float16,float16,0,0.023205332458019257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,8,128,1,float16,float16,0,0.023285334308942158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,1,128,1,float16,float16,0,0.015429332852363586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,40,128,1,float16,float16,0,0.01637866720557213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.017573333034912746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,2,128,1,float16,float16,0,0.01544533297419548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,4,128,1,float16,float16,0,0.01553600033124288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,8,128,1,float16,float16,0,0.015439999600251516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.031018666923046112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,40,128,1,float16,float16,0,0.012063999970753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,1,128,1,float16,float16,0,0.011525332927703857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,4,128,1,float16,float16,0,0.01157333329319954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,8,128,1,float16,float16,0,0.011637333780527115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,2,128,1,float16,float16,0,0.011424000064531961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.016480000068744022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,40,128,1,float16,float16,0,0.00847999999920527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,1,128,1,float16,float16,0,0.008154666672150293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,2,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,8,128,1,float16,float16,0,0.008176000167926153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,4,128,1,float16,float16,0,0.008282666405042013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,40,128,1,float16,float16,0,0.008485333373149237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.018330667167901993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.016303999970356624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,4,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,1,128,1,float16,float16,0,0.00786666696270307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,2,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,8,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,fp8,0,0.024304000039895374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,1,128,1,float16,float16,0,0.014389333625634512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,4,128,1,float16,float16,0,0.0421013335386912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,8,128,1,float16,float16,0,0.06676266590754192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,2,128,1,float16,float16,0,0.028207999964555103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.03587199995915095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.05537599821885427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,40,128,1,float16,float16,0,0.1297920048236847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,1,128,1,float16,float16,0,0.009914666414260864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,fp8,0,0.013914667069911957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,2,128,1,float16,float16,0,0.01851733277241389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,fp8,0,0.01616000011563301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,4,128,1,float16,float16,0,0.026447998980681103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.0220320001244545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.11377066373825073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,40,128,1,float16,float16,0,0.06871999800205231
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.03375466664632162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,1,128,1,float16,float16,0,0.009653333574533463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,8,128,1,float16,float16,0,0.03937600056330363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.06369066735108693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,2,128,1,float16,float16,0,0.013631999492645264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,4,128,1,float16,float16,0,0.01747200017174085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,40,128,1,float16,float16,0,0.03850133220354716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.014933332800865173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,8,128,1,float16,float16,0,0.02456533412138621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.021151999632517498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,1,128,1,float16,float16,0,0.009322666873534521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,fp8,0,0.011989332735538483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,2,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,fp8,0,0.012181332955757776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,4,128,1,float16,float16,0,0.013301332791646322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.03849066545565923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,40,128,1,float16,float16,0,0.02197866638501485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,8,128,1,float16,float16,0,0.016879999389251072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.014208000153303146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.01228800043463707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.025536000728607178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,2,128,1,float16,float16,0,0.012821332861979803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,4,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.011546666423479715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.013669333110253016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,fp8,0,0.011493333925803503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,fp8,0,0.011711999773979187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,40,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,2,128,1,float16,float16,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.011509332805871964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,4,128,1,float16,float16,0,0.012618667135636011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,fp8,0,0.011370666325092316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,8,128,1,float16,float16,0,0.01259200026591619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,2,128,1,float16,float16,0,0.012367999802033106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,40,128,1,float16,float16,0,0.011514666179815928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.020608000457286835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,4,128,1,float16,float16,0,0.012442667037248611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.01640533283352852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,1,128,1,float16,float16,0,0.0086666668454806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,8,128,1,float16,float16,0,0.012549333274364471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,2,128,1,float16,float16,0,0.011962667107582092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,4,128,1,float16,float16,0,0.012138667205969492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,40,128,1,float16,float16,0,0.008245333408315977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,8,128,1,float16,float16,0,0.012149333953857422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,40,128,1,float16,float16,0,0.008197333042820295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.015674666812022526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,2,128,1,float16,float16,0,0.00855466661353906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,4,128,1,float16,float16,0,0.008623999853928884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,8,128,1,float16,float16,0,0.008789333204428354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.014533333480358124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,4.112613360087077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,5.539877573649089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,10.039594650268555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,31.71612294514974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,2.0726879437764487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,1,128,1,float16,float16,0,51.38232930501302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,32,128,1,float16,float16,0,55.42680358886719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,2.6305920282999673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,4.9595521291097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,2,128,1,float16,float16,0,52.60973612467448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,1,128,1,float16,float16,0,108.94706217447917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,4,128,1,float16,float16,0,110.4012959798177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,2,128,1,float16,float16,0,113.3998514811198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,15.070906321207682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,8,128,1,float16,float16,0,112.41707356770833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,1.0521653493245442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,1.3862667083740234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,4,128,1,float16,float16,0,56.00137837727865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,117.4430643717448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,2.655989329020182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,8,128,1,float16,float16,0,52.18292236328125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,32,128,1,float16,float16,0,27.214630126953125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,7.164757410685222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,1,128,1,float16,float16,0,26.9681396484375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,0.5131413141886393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,2,128,1,float16,float16,0,26.414886474609375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,0.7258986632029215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,32,128,1,float16,float16,0,13.965807596842447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,4,128,1,float16,float16,0,25.864473978678387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.4118132591247559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,1,128,1,float16,float16,0,12.96713129679362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,3.6836318969726562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,2,128,1,float16,float16,0,13.54465103149414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,8,128,1,float16,float16,0,27.414649963378906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,4,128,1,float16,float16,0,13.413888295491537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,2.9240105946858725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,8,128,1,float16,float16,0,12.867168426513672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,3.6623306274414062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,28.505940755208332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.356304168701172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,58.04176330566406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,19.605215708414715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,1.4393332799275715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,32,128,1,float16,float16,0,32.56057484944662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,1,128,1,float16,float16,0,29.62054951985677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,1.9325332641601562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.2945919036865234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,1,128,1,float16,float16,0,63.78983052571615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,2,128,1,float16,float16,0,29.9945805867513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,2,128,1,float16,float16,0,62.09130859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,4,128,1,float16,float16,0,63.05709330240885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,8.889424006144205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,8,128,1,float16,float16,0,63.12976582845052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,0.7718986670176188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.0236480236053467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,4,128,1,float16,float16,0,29.929776509602863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,32,128,1,float16,float16,0,15.81699244181315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.6488265991210938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,65.31455993652344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,1,128,1,float16,float16,0,15.908826192220053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,4.155920028686523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,8,128,1,float16,float16,0,29.685755411783855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,2,128,1,float16,float16,0,16.1256103515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.3934026559193929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.5478880008061727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,4,128,1,float16,float16,0,15.392869313557943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,32,128,1,float16,float16,0,7.924997329711914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,0.9286346435546875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,1,128,1,float16,float16,0,7.1057173411051435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,2.1793600718180337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,8,128,1,float16,float16,0,15.959376017252604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,2,128,1,float16,float16,0,7.097818374633789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,4,128,1,float16,float16,0,6.799034754435222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,32.86302947998047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,2.3494933446248374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,8,128,1,float16,float16,0,7.5135148366292315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,2.9513813654581704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,16.798506418863933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.996890703837077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,12.841205596923828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,1.1817653179168701
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,32,128,1,float16,float16,0,22.915242513020832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,1,128,1,float16,float16,0,21.276357014973957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,1.5036746660868328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.5302346547444663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,1,128,1,float16,float16,0,42.663195292154946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,2,128,1,float16,float16,0,21.327242533365887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,2,128,1,float16,float16,0,43.46239217122396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,4,128,1,float16,float16,0,42.86982727050781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,8,128,1,float16,float16,0,43.182657877604164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,0.6325493256251017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,6.346768061319987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,0.7998720010121664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,4,128,1,float16,float16,0,21.88597361246745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,32,128,1,float16,float16,0,10.959264119466146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,47.937723795572914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.3750346501668294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,8,128,1,float16,float16,0,21.775652567545574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,1,128,1,float16,float16,0,10.213040033976236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,2,128,1,float16,float16,0,10.005834579467773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,3.345088005065918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.21023466189702353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,32,128,1,float16,float16,0,5.0032533009847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.405839999516805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,1,128,1,float16,float16,0,4.754661242167155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,4,128,1,float16,float16,0,9.51793098449707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.664901336034139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,2,128,1,float16,float16,0,4.985109329223633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,1.829909324645996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,8,128,1,float16,float16,0,10.736485799153646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,4,128,1,float16,float16,0,4.822479883829753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,8,128,1,float16,float16,0,4.491706530253093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,22.538441975911457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,11.265764872233072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,3.693536122639974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,4.689557393391927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,6.939872105916341
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,19.152100880940754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,1.8573493957519531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,32,128,1,float16,float16,0,30.65820821126302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,1,128,1,float16,float16,0,26.994590759277344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,2.435856024424235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,3.691856066385905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,2,128,1,float16,float16,0,28.983993530273438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,1,128,1,float16,float16,0,56.84179178873698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,2,128,1,float16,float16,0,57.56315612792969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,4,128,1,float16,float16,0,56.98090108235677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,8,128,1,float16,float16,0,58.26917012532552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,0.9299093087514242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,8.94820785522461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,4,128,1,float16,float16,0,27.966751098632812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.1425600051879883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,60.60553487141927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,32,128,1,float16,float16,0,15.244234720865885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,8,128,1,float16,float16,0,28.277252197265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.911802609761556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,1,128,1,float16,float16,0,14.420714060465494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,2,128,1,float16,float16,0,12.813925425211588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,4.338869412740071
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.46342400709788006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,4,128,1,float16,float16,0,14.150068918863932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,32,128,1,float16,float16,0,6.721242904663086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,1,128,1,float16,float16,0,6.209168116251628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.6071039835611979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.9637546539306641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,2,128,1,float16,float16,0,6.645477294921875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,8,128,1,float16,float16,0,14.736155192057291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,2.2321012814839682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,32,128,1,float16,float16,0,3.1148961385091147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,4,128,1,float16,float16,0,6.576047897338867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.07876800000667572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,30.91302490234375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,8,128,1,float16,float16,0,6.688549041748047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.22362132867177328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,14.97262446085612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,1,128,1,float16,float16,0,2.80184014638265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,2,128,1,float16,float16,0,2.9185654322306314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.5215466817220052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,1.2075573603312175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,4,128,1,float16,float16,0,3.0299412409464517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,8,128,1,float16,float16,0,3.0244267781575522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,7.326010386149089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,2.910640080769857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.4223626454671225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,5.066122690836589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,10.61404800415039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.46015469233195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,32,128,1,float16,float16,0,17.799002329508465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,1,128,1,float16,float16,0,16.196431477864582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.7174506187438965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,2.562335968017578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,2,128,1,float16,float16,0,31.229604085286457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,1,128,1,float16,float16,0,32.43476359049479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,4,128,1,float16,float16,0,32.40228271484375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,2,128,1,float16,float16,0,16.787818908691406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,8,128,1,float16,float16,0,32.156837463378906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.7358986536661783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,5.242272059122722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.8171040217081705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,32,128,1,float16,float16,0,8.471343994140625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,4,128,1,float16,float16,0,15.9793701171875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,35.346171061197914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,1,128,1,float16,float16,0,7.650218963623047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,1.29257067044576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,2,128,1,float16,float16,0,7.14515749613444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,4,128,1,float16,float16,0,6.321733474731445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,2.782928148905436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.35315199693044025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,32,128,1,float16,float16,0,3.9542932510375977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,8,128,1,float16,float16,0,16.85268783569336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.44386132558186847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,1,128,1,float16,float16,0,3.266789436340332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,8,128,1,float16,float16,0,7.755237579345703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.65283731619517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,2,128,1,float16,float16,0,3.038384119669596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,1.3372534116109211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,18.129141489664715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,4,128,1,float16,float16,0,3.159210522969564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,8,128,1,float16,float16,0,3.2991838455200195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.05968533456325531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,1,128,1,float16,float16,0,1.562405268351237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,32,128,1,float16,float16,0,2.0060853958129883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.12598400314648947
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,8.808757146199545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.24476800362269083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,2,128,1,float16,float16,0,1.7173066139221191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,4,128,1,float16,float16,0,1.6384159723917644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.7490399678548177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,8,128,1,float16,float16,0,1.605413277943929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,3.723087946573893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,4.320133209228516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.818197250366211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,5.719173431396484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,10.577615737915039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.7690773010253906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,1,128,1,float16,float16,0,15.133684794108072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,32,128,1,float16,float16,0,17.26361592610677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,2.0810720125834146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,2,128,1,float16,float16,0,30.647557576497395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,4,128,1,float16,float16,0,31.532554626464844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,2.872624079386393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,1,128,1,float16,float16,0,31.150431315104168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,2,128,1,float16,float16,0,15.108570098876953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,8,128,1,float16,float16,0,31.989471435546875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.9670399824778239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,5.428053538004558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,1.0217066605885823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,4,128,1,float16,float16,0,15.231829325358072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,32,128,1,float16,float16,0,8.181573232014975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,1,128,1,float16,float16,0,6.515685399373372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,1.6308800379435222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,2,128,1,float16,float16,0,5.800490697224935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,32.56798807779948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,8,128,1,float16,float16,0,14.748575846354166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,2.929429372151693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,4,128,1,float16,float16,0,6.862656275431315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.46751999855041504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,32,128,1,float16,float16,0,3.7031145095825195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,8,128,1,float16,float16,0,7.269903818766276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.5103146632512411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,1,128,1,float16,float16,0,3.0084959665934243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.7790773709615072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,2,128,1,float16,float16,0,3.2841758728027344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,16.672613779703777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,4,128,1,float16,float16,0,3.045530637105306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,8,128,1,float16,float16,0,2.9931891759236655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,1.3874613444010417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,32,128,1,float16,float16,0,1.883296012878418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.07702933251857758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,7.756725311279297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.13742933670679727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,1,128,1,float16,float16,0,1.5318560600280762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.37600000699361164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,4,128,1,float16,float16,0,1.4709973335266113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,2,128,1,float16,float16,0,1.5179680188496907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.6891466776529948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.043882668018341064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,3.585199991861979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,8,128,1,float16,float16,0,1.5899465878804524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,32,128,1,float16,float16,0,0.9405600229899088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.06504000226656596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,1,128,1,float16,float16,0,0.7424106597900391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,4,128,1,float16,float16,0,0.8592639764149984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.1395840048789978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,2,128,1,float16,float16,0,0.7458879947662354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.3594133456548055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,8,128,1,float16,float16,0,0.7918186982472738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,1.778528054555257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,2.651573340098063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,3.0145492553710938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,4.194064140319824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,6.881807963053386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,32,128,1,float16,float16,0,8.966501235961914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,1,128,1,float16,float16,0,7.862927754720052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,2,128,1,float16,float16,0,17.482842763264973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,1,128,1,float16,float16,0,18.05251185099284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,4,128,1,float16,float16,0,18.16427230834961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,8,128,1,float16,float16,0,17.93222427368164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,1.4160906473795574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,1.560805320739746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,2.0388693809509277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,3.6291465759277344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,2,128,1,float16,float16,0,7.822778701782227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,1,128,1,float16,float16,0,3.184453328450521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,18.828975677490234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,4,128,1,float16,float16,0,8.505205154418945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.7073973019917806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,32,128,1,float16,float16,0,4.6770985921223955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.7511253356933594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,8,128,1,float16,float16,0,8.88211186726888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,1.083957354227702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,2,128,1,float16,float16,0,3.4823786417643228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,1.7876747449239094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,8.661104202270508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,32,128,1,float16,float16,0,2.3331947326660156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.3487733205159505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,1,128,1,float16,float16,0,1.6315946578979492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,8,128,1,float16,float16,0,3.448058764139811
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,4,128,1,float16,float16,0,3.315946578979492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.3922719955444336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.8446719646453857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.5006719827651978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,2,128,1,float16,float16,0,1.787765343983968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,4,128,1,float16,float16,0,1.7441120147705078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.04711466530958811
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,8,128,1,float16,float16,0,1.8167200088500977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,32,128,1,float16,float16,0,1.1500319639841716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,4.20413335164388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,1,128,1,float16,float16,0,0.872757355372111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.08162133395671844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.16088533401489258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,4,128,1,float16,float16,0,0.8795519669850668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,2,128,1,float16,float16,0,0.9107306798299154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.45100800196329754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,2.0909600257873535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,32,128,1,float16,float16,0,0.6007999976476034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,8,128,1,float16,float16,0,0.8848586877187093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.03270400067170461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,1,128,1,float16,float16,0,0.44306135177612305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,2,128,1,float16,float16,0,0.4647626479466756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.05017066498597463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.10196800033251445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,4,128,1,float16,float16,0,0.48021864891052246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.20881599187850952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,8,128,1,float16,float16,0,0.4578666687011719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,1.0907786687215169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,4.931376139322917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,3.6885814666748047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,3.9302988052368164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,8.116277058919271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,1.884453296661377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,1,128,1,float16,float16,0,6.875359853108724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,4,128,1,float16,float16,0,18.127029418945312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,32,128,1,float16,float16,0,9.328522364298502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,8,128,1,float16,float16,0,17.467370351155598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,1,128,1,float16,float16,0,17.025909423828125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,2,128,1,float16,float16,0,17.539690653483074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,1.9837172826131184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,2.4854186375935874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,2,128,1,float16,float16,0,7.606874465942383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,4,128,1,float16,float16,0,6.40338134765625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,4.01200008392334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,32,128,1,float16,float16,0,4.719701449076335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,18.53770701090495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,8,128,1,float16,float16,0,7.1541703542073565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.9467413425445557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,1,128,1,float16,float16,0,3.028890609741211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,1.0351253350575764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,1.30294402440389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,2,128,1,float16,float16,0,3.011232058207194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,9.114367802937826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,1.9903093973795574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,4,128,1,float16,float16,0,3.2373278935750327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,8,128,1,float16,float16,0,3.385845184326172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.46215999126434326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.5034720102945963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,1,128,1,float16,float16,0,1.6380213101704915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,2,128,1,float16,float16,0,1.5966453552246094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,4,128,1,float16,float16,0,1.6101706822713215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.6309173504511515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,32,128,1,float16,float16,0,2.4857385953267417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.06714133421579997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,1,128,1,float16,float16,0,0.8247573375701904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,4.145296096801758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,32,128,1,float16,float16,0,1.172048012415568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.1039466659228007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,1.0212000211079915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,4,128,1,float16,float16,0,0.816378672917684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,2,128,1,float16,float16,0,0.8252159754435221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.2916746735572815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,8,128,1,float16,float16,0,1.7960906028747559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,8,128,1,float16,float16,0,0.8553067048390707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.4643253485361735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.0364479993780454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,1,128,1,float16,float16,0,0.4070826768875122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.04631466666857401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,2.156757354736328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,32,128,1,float16,float16,0,0.6090720097223917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,4,128,1,float16,float16,0,0.43247465292612713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.08961066603660583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,1.006709337234497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.19801600774129233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,2,128,1,float16,float16,0,0.4115519920984904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,8,128,1,float16,float16,0,0.4457333485285441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,32,128,1,float16,float16,0,0.2597386638323466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,1,128,1,float16,float16,0,0.23298666874567667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.034346667428811394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,2,128,1,float16,float16,0,0.23084266980489096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.05288533369700114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,8,128,1,float16,float16,0,0.23772799968719482
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,4,128,1,float16,float16,0,0.2334293325742086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.4994293451309204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.11218667030334473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,2.643455982208252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,3.545098622639974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,5.499327977498372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,3.1347678502400718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,1,128,1,float16,float16,0,9.237695693969727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,4,128,1,float16,float16,0,10.112869262695312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,8,128,1,float16,float16,0,10.042240142822266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,32,128,1,float16,float16,0,6.095647811889648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,1,128,1,float16,float16,0,3.603941281636556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,2,128,1,float16,float16,0,8.130704243977865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,1.8961226145426433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,1.4643467267354329
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,4,128,1,float16,float16,0,4.222933451334636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,2,128,1,float16,float16,0,4.25981871287028
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,1.4204427401224773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,8,128,1,float16,float16,0,4.337765375773112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,2.9472427368164062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,11.01751963297526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,1,128,1,float16,float16,0,1.9445120493570964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,32,128,1,float16,float16,0,3.1787681579589844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.729861338933309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.6576213439305624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,2,128,1,float16,float16,0,1.9147574106852214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.9434613386789957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,4,128,1,float16,float16,0,1.902901331583659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,8,128,1,float16,float16,0,2.1748693784077964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,1.422693411509196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.3366560141245524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,2,128,1,float16,float16,0,0.9384053548177084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.34434131781260174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,1,128,1,float16,float16,0,0.9319199721018473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,5.193157196044922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,32,128,1,float16,float16,0,1.5271573066711426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,4,128,1,float16,float16,0,1.014367977778117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,2.726853370666504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.4316106637318929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,8,128,1,float16,float16,0,1.0189173221588135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,32,128,1,float16,float16,0,0.7662773132324219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.6305546760559082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.044154668847719826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,1,128,1,float16,float16,0,0.4826720158259074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,4,128,1,float16,float16,0,0.5133440097173055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.114656001329422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.06147199869155884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,1.2328373591105144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,2,128,1,float16,float16,0,0.49584531784057617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,32,128,1,float16,float16,0,0.37861867745717365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,8,128,1,float16,float16,0,0.5322986841201782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.02959999938805898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.03517866631348928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.30509332815806073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,1,128,1,float16,float16,0,0.24679466088612875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,2,128,1,float16,float16,0,0.25099732478459674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.07128533224264781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.6244320074717203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,4,128,1,float16,float16,0,0.25758934020996094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.02162133405605952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,8,128,1,float16,float16,0,0.2606613238652547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,32,128,1,float16,float16,0,0.1523306667804718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,1,128,1,float16,float16,0,0.14869333306948343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.13152000308036804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,2,128,1,float16,float16,0,0.1495039959748586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.027077332139015198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,4,128,1,float16,float16,0,0.15617066621780396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.042080000042915344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,8,128,1,float16,float16,0,0.15412799517313638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.08686400453249614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.3248266577720642
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,3.5277226765950522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,3.901621182759603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,1,128,1,float16,float16,0,7.134511947631836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,8,128,1,float16,float16,0,10.093429565429688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,2,128,1,float16,float16,0,7.885109583536784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,6.498453140258789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,1.8904266357421875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,4.690906524658203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,1,128,1,float16,float16,0,3.548826535542806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,4,128,1,float16,float16,0,10.512389500935873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,2,128,1,float16,float16,0,3.606821378072103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,1.933743953704834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,11.451403299967447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,2.3535946210225425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,8,128,1,float16,float16,0,4.242831865946452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,4,128,1,float16,float16,0,3.9513492584228516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,32,128,1,float16,float16,0,6.739370981852214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,32,128,1,float16,float16,0,3.3759679794311523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,1,128,1,float16,float16,0,1.7621973355611165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,3.305743853251139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,5.615519841512044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,2,128,1,float16,float16,0,1.7853813171386719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.8853493531545004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.9606719811757406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,1.6488000551859539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,1.2356639703114827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,8,128,1,float16,float16,0,2.12937593460083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.42740265528361004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,32,128,1,float16,float16,0,1.6877172787984211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,2,128,1,float16,float16,0,0.9836479822794596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,1,128,1,float16,float16,0,0.9201280275980631
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.531712015469869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,8,128,1,float16,float16,0,1.0765759944915771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,32,128,1,float16,float16,0,0.8314239978790283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,4,128,1,float16,float16,0,1.0034879843393962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.4933333396911621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.8243626753489176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.05161066850026449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,4,128,1,float16,float16,0,2.024693330128988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,1,128,1,float16,float16,0,0.47724799315134686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,2.850912094116211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.0897759993871053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.22820266087849936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,2,128,1,float16,float16,0,0.5003840128580729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,4,128,1,float16,float16,0,0.5279093186060587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.35282135009765625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,32,128,1,float16,float16,0,0.4293813308080037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,1.3929813702901204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.029114666084448498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.03676799933115641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,1,128,1,float16,float16,0,0.23094399770100912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,4,128,1,float16,float16,0,0.25034666061401367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,8,128,1,float16,float16,0,0.2638559937477112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,2,128,1,float16,float16,0,0.2399253249168396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.6673226356506348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.06651733318964641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.020794666061798733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.1313813328742981
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,1,128,1,float16,float16,0,0.1272266705830892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,2,128,1,float16,float16,0,0.12999999523162842
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,32,128,1,float16,float16,0,0.16460266709327698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.02613866577545802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,4,128,1,float16,float16,0,0.13363200426101685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.03915199885765711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,8,128,1,float16,float16,0,0.5445173184076945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,32,128,1,float16,float16,0,0.0844586690266927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,1,128,1,float16,float16,0,0.07884266475836436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,8,128,1,float16,float16,0,0.13928000132242838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.2876159946123759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.07132266461849213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,4,128,1,float16,float16,0,0.08290666838486989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.07549333572387695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.07956266899903615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,8,128,1,float16,float16,0,0.08221333225568135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.08943999807039897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,2,128,1,float16,float16,0,0.07979733248551686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.11498666803042094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.2320586641629537
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,3.532634735107422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,1,128,1,float16,float16,0,6.223157246907552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,3.9154027303059897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,4.686607996622722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,2,128,1,float16,float16,0,6.3468373616536455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,4,128,1,float16,float16,0,6.981258392333984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,2,128,1,float16,float16,0,2.349775950113932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,32,128,1,float16,float16,0,6.421679814656575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,1,128,1,float16,float16,0,2.170719941457113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,1.7795252799987793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,8.04864501953125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,6.2665761311848955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,4,128,1,float16,float16,0,2.649631977081299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,8,128,1,float16,float16,0,7.730282465616862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,1.9680479367574055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,2.5263733863830566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,1,128,1,float16,float16,0,1.0911040306091309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,32,128,1,float16,float16,0,3.4866345723470054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,8,128,1,float16,float16,0,3.1695359547932944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,2,128,1,float16,float16,0,1.1684426466623943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,3.1518452962239585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,4,128,1,float16,float16,0,1.3125920295715332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,1.0290826956431072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,1.2232853571573894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,1,128,1,float16,float16,0,0.5665119886398315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,1.558176040649414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,2.077306588490804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,8,128,1,float16,float16,0,1.5559840202331543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,2,128,1,float16,float16,0,0.596501350402832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.4643253485361735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,4.171866734822591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,4,128,1,float16,float16,0,0.6692213217417399
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,32,128,1,float16,float16,0,0.8047146797180176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.4561440149943034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.5245653390884399
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,32,128,1,float16,float16,0,1.6223413149515789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.050240000089009605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,1,128,1,float16,float16,0,0.3009120027224223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.8840533097585043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,8,128,1,float16,float16,0,0.7554240226745605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.0763626645008723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,4,128,1,float16,float16,0,0.3403786818186442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,2,128,1,float16,float16,0,0.31670933961868286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,8,128,1,float16,float16,0,0.3693759838740031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.2246933380762736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.7758986949920654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.30187199513117474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,32,128,1,float16,float16,0,0.3901439905166626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.029109333952267964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,1.003007968266805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,1,128,1,float16,float16,0,0.13983466227849325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,2,128,1,float16,float16,0,0.1446293294429779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.055162668228149414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,8,128,1,float16,float16,0,0.17428267002105713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,4,128,1,float16,float16,0,0.15773866573969522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,32,128,1,float16,float16,0,0.10893332958221436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.4639413356781006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,1,128,1,float16,float16,0,0.07774933179219563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.022639999787012737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.02070933332045873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.0986293355623881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,2,128,1,float16,float16,0,0.08025600016117096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.03279466678698858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,8,128,1,float16,float16,0,0.08428266644477844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,4,128,1,float16,float16,0,0.08553600311279297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.034389334420363106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.19060266017913818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,1,128,1,float16,float16,0,0.043663998444875084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.04721599817276001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.07436266541481018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,2,128,1,float16,float16,0,0.04419733087221781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.08320533235867818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,32,128,1,float16,float16,0,0.05030933519204458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.07669866581757863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,8,128,1,float16,float16,0,0.04606399933497111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,4,128,1,float16,float16,0,0.045647998650868736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.1620213290055593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.09945600231488545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,2,128,1,float16,float16,0,0.026858667532602947
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.04470400015513102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,1,128,1,float16,float16,0,0.02722666660944621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,32,128,1,float16,float16,0,0.02994133283694585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.04474133253097534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.04996799925963084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,4,128,1,float16,float16,0,0.02826133370399475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,8,128,1,float16,float16,0,0.028255999088287354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.08975467085838318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.05274133384227753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,1.8959627151489258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,1.944591999053955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,1,128,1,float16,float16,0,2.0589332580566406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,2,128,1,float16,float16,0,2.300831953684489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,2.3604000409444175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,4,128,1,float16,float16,0,2.5309813817342124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,1,128,1,float16,float16,0,0.921994686126709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,8,128,1,float16,float16,0,3.1527093251546225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,32,128,1,float16,float16,0,3.2392425537109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,3.1488478978474936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.8873866399129232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,3.4261013666788735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,2,128,1,float16,float16,0,0.9457653363545736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,1.025712013244629
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,1.2327306270599365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,32,128,1,float16,float16,0,1.6286773681640625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,1,128,1,float16,float16,0,0.4203999837239583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,4,128,1,float16,float16,0,1.115114688873291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.45606398582458496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,1.5655892690022786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,2,128,1,float16,float16,0,0.5040106773376465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,4,128,1,float16,float16,0,0.542469342549642
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,8,128,1,float16,float16,0,1.4239306449890137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,1.7499465942382812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.46348265806833905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.5262080033620199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,32,128,1,float16,float16,0,0.8036266962687174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,8,128,1,float16,float16,0,0.7120693524678549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.05461333195368449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,1,128,1,float16,float16,0,0.22596800327301025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,2,128,1,float16,float16,0,0.2302560011545817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.7786346276601156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.10046399633089702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,4,128,1,float16,float16,0,0.26331732670466107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.20843199888865152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.27002133925755817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,1,128,1,float16,float16,0,0.09757866462071736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,8,128,1,float16,float16,0,0.36325331528981525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.02914133419593175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,2,128,1,float16,float16,0,0.10162132978439331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.365450660387675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.03398933261632919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,32,128,1,float16,float16,0,0.09071466326713562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,32,128,1,float16,float16,0,0.3868906497955322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,8,128,1,float16,float16,0,0.11628799637158711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,4,128,1,float16,float16,0,0.11131733655929565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.07282666862010956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.05055999755859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.797813336054484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.030063999195893604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,1,128,1,float16,float16,0,0.054330666859944664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.1301866670449575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,4,128,1,float16,float16,0,0.05989866455396017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,2,128,1,float16,float16,0,0.055231998364130654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.023039999107519787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,8,128,1,float16,float16,0,0.06025599936644236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,2,128,1,float16,float16,0,0.031104000906149547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,1,128,1,float16,float16,0,0.030576000610987347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.07492800056934357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.07554666697978973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,32,128,1,float16,float16,0,0.036677333215872444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.037818667789300285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,4,128,1,float16,float16,0,0.033514666060606636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.07912000020345052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,8,128,1,float16,float16,0,0.03363733241955439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.08841600020726521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.13748799761136374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,32,128,1,float16,float16,0,0.02229333420594533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,2,128,1,float16,float16,0,0.01904533306757609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,4,128,1,float16,float16,0,0.02041600023706754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,1,128,1,float16,float16,0,0.01903466631968816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.04461333155632019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.04450666904449463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,8,128,1,float16,float16,0,0.020538666596015293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.07161066432793935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.04608533283074697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,1,128,1,float16,float16,0,0.01231466606259346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,32,128,1,float16,float16,0,0.013466666142145792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.04950400193532308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.02926933268706004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,8,128,1,float16,float16,0,0.012458667159080505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,4,128,1,float16,float16,0,0.012416000167528788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.02943466603755951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.02914133419593175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,2,128,1,float16,float16,0,0.012250666817029318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.0324799989660581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.040074666341145836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.8872000376383463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,1,128,1,float16,float16,0,0.9271946748097738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,4,128,1,float16,float16,0,1.1621867020924885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,1.166378657023112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,8,128,1,float16,float16,0,1.5019359588623047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.9677440325419108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,1.599738597869873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,2,128,1,float16,float16,0,0.9961386521657308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,32,128,1,float16,float16,0,1.6276373863220215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,1,128,1,float16,float16,0,0.43966933091481525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,1.6909759839375813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.42823465665181476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.455077330271403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,8,128,1,float16,float16,0,0.7400906880696615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,2,128,1,float16,float16,0,0.503653327624003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.7439839839935303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,4,128,1,float16,float16,0,0.5540266831715902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,1,128,1,float16,float16,0,0.20805867513020834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.7066346804300944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.05117866893609365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,2,128,1,float16,float16,0,0.21926399072011313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,4,128,1,float16,float16,0,0.25624533494313556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.07528000076611836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,8,128,1,float16,float16,0,0.3511413335800171
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,32,128,1,float16,float16,0,0.36163198947906494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.2796693245569865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,32,128,1,float16,float16,0,0.8047306537628174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.5649760166803995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.2935840090115865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,1,128,1,float16,float16,0,0.07793599863847096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,2,128,1,float16,float16,0,0.08142933249473572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,8,128,1,float16,float16,0,0.09775466720263164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.21826666593551636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.02938133229811986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,4,128,1,float16,float16,0,0.09052800138791402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,32,128,1,float16,float16,0,0.07189333438873291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.050517335534095764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.0626986672480901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.02089600016673406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,1,128,1,float16,float16,0,0.04228266576925913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,4,128,1,float16,float16,0,0.04788800080617269
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.02258133391539256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,2,128,1,float16,float16,0,0.0439626673857371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.02991466720898946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,8,128,1,float16,float16,0,0.04817600051561991
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.03438399980465571
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,1,128,1,float16,float16,0,0.024533333877722423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.09907199939092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,32,128,1,float16,float16,0,0.03051200012365977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.034202667574087776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,2,128,1,float16,float16,0,0.025098666548728943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.01661866654952367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.052970667680104576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.021642667551835377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,32,128,1,float16,float16,0,0.017423999806245167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.020506666352351505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,8,128,1,float16,float16,0,0.027466667195161183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.01441066712141037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,1,128,1,float16,float16,0,0.01470400020480156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.014858666807413101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,2,128,1,float16,float16,0,0.01463466634353002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,4,128,1,float16,float16,0,0.027552001178264618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.02939733366171519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.01590399940808614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,8,128,1,float16,float16,0,0.015706667055686314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,4,128,1,float16,float16,0,0.01552533358335495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.016656000167131424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.013477332890033722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,2,128,1,float16,float16,0,0.009786666681369146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,32,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.013552000125249227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,1,128,1,float16,float16,0,0.010101333260536194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,8,128,1,float16,float16,0,0.010079999764760336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.01369599997997284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,4,128,1,float16,float16,0,0.009888000165422758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.013674666484196981
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,32,128,1,float16,float16,0,0.00943999985853831
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,2,128,1,float16,float16,0,0.009072000160813332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,8,128,1,float16,float16,0,0.00915733352303505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.01786133274435997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,1,128,1,float16,float16,0,0.45535465081532794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.42745065689086914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,4,128,1,float16,float16,0,0.5672373374303182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,2,128,1,float16,float16,0,0.5493119955062866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.49560534954071045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,8,128,1,float16,float16,0,0.7440586884816488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.7331786950429281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,32,128,1,float16,float16,0,0.8034559885660807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.5650453170140585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,1,128,1,float16,float16,0,0.21067200104395548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.6982879638671875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.05180266499519348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,8,128,1,float16,float16,0,0.3501226504643758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,2,128,1,float16,float16,0,0.21918400128682455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.07496533294518788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.27051732937494916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,32,128,1,float16,float16,0,0.3641759951909383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,1,128,1,float16,float16,0,0.07221866647402446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,4,128,1,float16,float16,0,0.25625600417455036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.2823573350906372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.05003199974695841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.21851734320322672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.029045333464940388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,2,128,1,float16,float16,0,0.07518399755160014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,4,128,1,float16,float16,0,0.08323200047016144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,8,128,1,float16,float16,0,0.09755200147628784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,32,128,1,float16,float16,0,0.058933332562446594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,1,128,1,float16,float16,0,0.039919999738534294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.020821332931518555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,2,128,1,float16,float16,0,0.04177066683769226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.03404266635576884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.06285333136717479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.029557332396507263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.02276800076166789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,8,128,1,float16,float16,0,0.04576000074545542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.07349333167076111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,4,128,1,float16,float16,0,0.04555733501911163
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,1,128,1,float16,float16,0,0.022869333624839783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.03451200077931086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.016442666451136272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,32,128,1,float16,float16,0,0.028688001135985058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.04001066585381826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,4,128,1,float16,float16,0,0.025413334369659424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,8,128,1,float16,float16,0,0.025472000241279602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,1,128,1,float16,float16,0,0.013872000078360239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,32,128,1,float16,float16,0,0.016623999923467636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.01452800010641416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.021850667893886566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,2,128,1,float16,float16,0,0.02329600105683009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,4,128,1,float16,float16,0,0.014885333677132925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,2,128,1,float16,float16,0,0.013797332843144735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.015935999651749928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.023226665953795116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,8,128,1,float16,float16,0,0.014912000546852747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,32,128,1,float16,float16,0,0.010591999938090643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.014533333480358124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.016496000190575916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,1,128,1,float16,float16,0,0.009370666618148485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,4,128,1,float16,float16,0,0.009514666472872099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,2,128,1,float16,float16,0,0.009381333366036415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.01339200014869372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.013679999858140945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,8,128,1,float16,float16,0,0.009589333087205887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,1,128,1,float16,float16,0,0.008581333483258883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.015520000209410986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.01357866699496905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,32,128,1,float16,float16,0,0.009312000125646591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,2,128,1,float16,float16,0,0.008746666833758354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,8,128,1,float16,float16,0,0.008938666433095932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,4,128,1,float16,float16,0,0.008842666943868002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.014074667046467463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,32,128,1,float16,float16,0,0.008757333581646284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,1,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.01250133290886879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,2,128,1,float16,float16,0,0.008314666648705801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,4,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,8,128,1,float16,float16,0,0.008421333506703377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.013637332866589228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.07734933495521545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,1,128,1,float16,float16,0,0.2726186712582906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,4,128,1,float16,float16,0,0.3134559988975525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,2,128,1,float16,float16,0,0.28679466247558594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.15219733119010925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.255349338054657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,8,128,1,float16,float16,0,0.3543999989827474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,32,128,1,float16,float16,0,0.35998932520548504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,1,128,1,float16,float16,0,0.12306132912635803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.033610666791598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,2,128,1,float16,float16,0,0.1270240048567454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.36955201625823975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.34178133805592853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.03932266682386398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,8,128,1,float16,float16,0,0.1446506679058075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,4,128,1,float16,float16,0,0.13409599661827087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.06555200119813283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,32,128,1,float16,float16,0,0.0883733332157135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.02436800052722295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.09217600027720134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,2,128,1,float16,float16,0,0.06666666766007741
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.026602665583292644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,8,128,1,float16,float16,0,0.07051733136177063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,4,128,1,float16,float16,0,0.07034666836261749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,32,128,1,float16,float16,0,0.041877334316571556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,1,128,1,float16,float16,0,0.06478400031725566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.04903466502825419
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.12903466820716858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.035071998834609985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,1,128,1,float16,float16,0,0.03605866680542628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.06857599814732869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,4,128,1,float16,float16,0,0.03880000114440918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.020074666788180668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,8,128,1,float16,float16,0,0.03892799963553747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,2,128,1,float16,float16,0,0.036864000062147774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.026560001075267792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,1,128,1,float16,float16,0,0.020373333245515823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,4,128,1,float16,float16,0,0.021322667598724365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.018901333212852478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.02438933402299881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,2,128,1,float16,float16,0,0.020469332734743755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.018112000077962875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,32,128,1,float16,float16,0,0.023408000667889912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.03756266583998998
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.019600000232458115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,8,128,1,float16,float16,0,0.021381333470344543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,32,128,1,float16,float16,0,0.013679999858140945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,2,128,1,float16,float16,0,0.01257066677014033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,1,128,1,float16,float16,0,0.012565333396196365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.02040533348917961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.017903999735911686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.020687999824682873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,4,128,1,float16,float16,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,32,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,1,128,1,float16,float16,0,0.008618666479984919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,8,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,4,128,1,float16,float16,0,0.00867733359336853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,8,128,1,float16,float16,0,0.00860799973209699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,2,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.01764800027012825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,32,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.01681600014368693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.018207999567190807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.016949333250522614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,1,128,1,float16,float16,0,0.008277333031098047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,2,128,1,float16,float16,0,0.008293333152929941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,4,128,1,float16,float16,0,0.008298666526873907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,8,128,1,float16,float16,0,0.008298666526873907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,32,128,1,float16,float16,0,0.008416000132759413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.01754666616519292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.017290666699409485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,1,128,1,float16,float16,0,0.008047999814152718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,2,128,1,float16,float16,0,0.008127999802430471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.016522667060295742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,4,128,1,float16,float16,0,0.008063999935984612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.017562666287024815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,8,128,1,float16,float16,0,0.008154666672150293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,2,128,1,float16,float16,0,0.22957332928975424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.09046399593353271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.03536533315976461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,1,128,1,float16,float16,0,0.2265119949976603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.05277333160241445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,4,128,1,float16,float16,0,0.23987199862798056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,8,128,1,float16,float16,0,0.24989332755406699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,32,128,1,float16,float16,0,0.145797332127889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,1,128,1,float16,float16,0,0.11608533064524333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.025098666548728943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,4,128,1,float16,float16,0,0.12104533116022746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,2,128,1,float16,float16,0,0.1172160009543101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.2017013430595398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.02865600089232127
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.14297067125638327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,32,128,1,float16,float16,0,0.06644799808661143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,8,128,1,float16,float16,0,0.12176000078519185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.04807466765244802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.02024000013868014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.1053653359413147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.07226666808128357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,2,128,1,float16,float16,0,0.061066667238871254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,4,128,1,float16,float16,0,0.06326933205127716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.021695998807748158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,1,128,1,float16,float16,0,0.06035199761390686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.026101333399613697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,8,128,1,float16,float16,0,0.06311466793219249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,1,128,1,float16,float16,0,0.033941333492596946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,32,128,1,float16,float16,0,0.03692800054947535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.01811733345190684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,2,128,1,float16,float16,0,0.034074666599432625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.03989866624275843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.0584746648867925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.018170667191346485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.02004266654451688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,4,128,1,float16,float16,0,0.03513599932193756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,8,128,1,float16,float16,0,0.035232000052928925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,32,128,1,float16,float16,0,0.020608000457286835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.022426667312781017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,2,128,1,float16,float16,0,0.019482667247454327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,1,128,1,float16,float16,0,0.019445333629846573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.01793066660563151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,4,128,1,float16,float16,0,0.019679999599854153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,8,128,1,float16,float16,0,0.019776000330845516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.03254933406909307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,1,128,1,float16,float16,0,0.012096000214417776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.017477333545684814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,32,128,1,float16,float16,0,0.012458667159080505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,2,128,1,float16,float16,0,0.012063999970753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,4,128,1,float16,float16,0,0.012080000092585882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.016650666793187458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,8,128,1,float16,float16,0,0.012037333101034164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,32,128,1,float16,float16,0,0.00855466661353906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,1,128,1,float16,float16,0,0.008330666770537695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,4,128,1,float16,float16,0,0.008277333031098047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,2,128,1,float16,float16,0,0.0081386665503184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,8,128,1,float16,float16,0,0.008330666770537695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.01729600007335345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.01781333362062772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,1,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,32,128,1,float16,float16,0,0.008074666683872541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,2,128,1,float16,float16,0,0.007930666829148928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,4,128,1,float16,float16,0,0.008074666683872541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.016688000410795212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,8,128,1,float16,float16,0,0.007978666573762894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,1,128,1,float16,float16,0,0.007829333345095316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,32,128,1,float16,float16,0,0.008298666526873907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,2,128,1,float16,float16,0,0.007818666597207388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.01661866654952367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,4,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,8,128,1,float16,float16,0,0.007925333455204964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,1,128,1,float16,float16,0,0.014101333916187286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,2,128,1,float16,float16,0,0.027029333015282948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.02314666658639908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,4,128,1,float16,float16,0,0.04119999955097834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.03475199888149897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,8,128,1,float16,float16,0,0.06555200119813283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,1,128,1,float16,float16,0,0.009712000067035357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,fp8,0,0.013434667140245438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,2,128,1,float16,float16,0,0.018016000588734944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,32,128,1,float16,float16,0,0.10496532917022705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.01570133368174235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.09197866916656494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.05438933273156484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,4,128,1,float16,float16,0,0.02499199906984965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.02149333308140437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,32,128,1,float16,float16,0,0.05569066603978475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,8,128,1,float16,float16,0,0.038736000657081604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,1,128,1,float16,float16,0,0.009477333476146063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.0330826664964358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,2,128,1,float16,float16,0,0.013552000125249227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.05221866567929586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,4,128,1,float16,float16,0,0.017152000218629837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.01470400020480156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,8,128,1,float16,float16,0,0.024133334557215374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,32,128,1,float16,float16,0,0.031701333820819855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,fp8,0,0.011813333878914515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.02072000006834666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.012047999848922094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,2,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,8,128,1,float16,float16,0,0.016629333297411602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,32,128,1,float16,float16,0,0.01842133328318596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.03223466624816259
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.014149333039919535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,fp8,0,0.01156266654531161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,2,128,1,float16,float16,0,0.012746666868527731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,4,128,1,float16,float16,0,0.012805332740147909
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,32,128,1,float16,float16,0,0.011445333560307821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.011546666423479715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.013536000003417334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,fp8,0,0.011424000064531961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,2,128,1,float16,float16,0,0.012554666648308435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,4,128,1,float16,float16,0,0.012522666404644648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,8,128,1,float16,float16,0,0.012666666259368261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,32,128,1,float16,float16,0,0.0080960001796484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.01573333392540614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.013514666507641474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,2,128,1,float16,float16,0,0.012063999970753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,4,128,1,float16,float16,0,0.012170666207869848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,32,128,1,float16,float16,0,0.00786666696270307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,1,128,1,float16,float16,0,0.008703999842206636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,8,128,1,float16,float16,0,0.012080000092585882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,2,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,4,128,1,float16,float16,0,0.01221866657336553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,8,128,1,float16,float16,0,0.012106666962305704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.014373333503802618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,32,128,1,float16,float16,0,0.007882666463653246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,1,128,1,float16,float16,0,0.00854399986565113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,2,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,4,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,8,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,3.229333241780599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,4.975706736246745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,11.912464141845703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,40.1001230875651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,1.6391679445902507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,24,128,1,float16,float16,0,41.48949940999349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,2.612175941467285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,1,128,1,float16,float16,0,38.91726430257162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,5.7507680257161455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,1,128,1,float16,float16,0,79.98731486002605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,2,128,1,float16,float16,0,82.08870442708333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,4,128,1,float16,float16,0,80.63305155436198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,8,128,1,float16,float16,0,82.48324584960938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,2,128,1,float16,float16,0,41.11009724934896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,0.8629493713378906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,4,128,1,float16,float16,0,42.56100209554037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,1.3645440737406414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,21.2445068359375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,24,128,1,float16,float16,0,22.64488474527995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,90.3795166015625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,1,128,1,float16,float16,0,21.181477864583332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,8,128,1,float16,float16,0,41.92395273844401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,3.023200035095215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,2,128,1,float16,float16,0,19.681082407633465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,0.44516265392303467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,1,128,1,float16,float16,0,9.799493153889975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,24,128,1,float16,float16,0,10.215797424316406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,10.447903951009115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,0.7166666984558105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,4,128,1,float16,float16,0,19.920389811197918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.5924053192138672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,2,128,1,float16,float16,0,9.914010365804037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,4.932474772135417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,2.333301385243734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,8,128,1,float16,float16,0,20.610815684000652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,4,128,1,float16,float16,0,9.701221466064453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,8,128,1,float16,float16,0,9.92784563700358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,43.38872782389323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,3.281514803568522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,22.242897033691406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,6.951802571614583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,23.397440592447918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,1.1884586811065674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,24,128,1,float16,float16,0,24.041392008463543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,1.7194612820943196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,1,128,1,float16,float16,0,22.639231363932293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,3.727333386739095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,1,128,1,float16,float16,0,45.509521484375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,2,128,1,float16,float16,0,47.6490478515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,4,128,1,float16,float16,0,49.09454345703125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,8,128,1,float16,float16,0,47.3507080078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,2,128,1,float16,float16,0,23.085835774739582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,0.6307040055592855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,12.560255686442057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,4,128,1,float16,float16,0,23.613375345865887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,0.9953493277231852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,49.491048177083336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.8182560602823894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,24,128,1,float16,float16,0,12.634485880533854
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,8,128,1,float16,float16,0,24.421780904134113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,1,128,1,float16,float16,0,11.656837463378906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,2,128,1,float16,float16,0,11.437615712483725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.2022506594657898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,5.680880228678386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,4,128,1,float16,float16,0,11.807573954264322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.4922826687494914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,24,128,1,float16,float16,0,5.3622080485026045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,1,128,1,float16,float16,0,5.571594874064128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,1.0252479712168376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,8,128,1,float16,float16,0,12.552389780680338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,2,128,1,float16,float16,0,4.929957389831543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,2.9658025105794272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,4,128,1,float16,float16,0,5.330154736836751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,1.9772586822509766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,8,128,1,float16,float16,0,5.335776011149089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,12.265050252278646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,2.6137653986612954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,25.94378662109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,5.105776151021321
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,16.897253672281902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,24,128,1,float16,float16,0,17.282122294108074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,0.9801066716512045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.4384479522705078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,1,128,1,float16,float16,0,32.17373911539713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,2,128,1,float16,float16,0,31.363438924153645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,2.8451627095540366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,4,128,1,float16,float16,0,32.96442667643229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,8,128,1,float16,float16,0,33.34784444173177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,1,128,1,float16,float16,0,16.2947514851888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,2,128,1,float16,float16,0,16.390832265218098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.5018986860911051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,4,128,1,float16,float16,0,15.968560536702475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,8.422554651896158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.7404747009277344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,24,128,1,float16,float16,0,8.269744237263998
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,35.12494913736979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,1.3407306671142578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,1,128,1,float16,float16,0,7.138816197713216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,8,128,1,float16,float16,0,16.106948852539062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,2,128,1,float16,float16,0,7.709775924682617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.14040533701578775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,24,128,1,float16,float16,0,3.545642534891764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,4.0267839431762695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,4,128,1,float16,float16,0,7.8615678151448565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.2940373420715332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,1,128,1,float16,float16,0,3.445023854573568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.8228586514790853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,8,128,1,float16,float16,0,7.920890808105469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,2,128,1,float16,float16,0,3.321653366088867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,17.495770772298176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,2.1057653427124023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,8,128,1,float16,float16,0,3.352245330810547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,4,128,1,float16,float16,0,3.1844053268432617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,8.449034372965494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,2.958815892537435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.147493362426758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,7.215066909790039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,21.622202555338543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,24,128,1,float16,float16,0,22.169087727864582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,1.5730400085449219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,1,128,1,float16,float16,0,20.540506998697918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.081098715464274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,3.6506665547688804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,1,128,1,float16,float16,0,45.542816162109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,2,128,1,float16,float16,0,44.41028849283854
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,4,128,1,float16,float16,0,42.75458780924479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,8,128,1,float16,float16,0,44.85815938313802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,2,128,1,float16,float16,0,21.43877919514974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,0.7746079762776693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,4,128,1,float16,float16,0,21.660964965820312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.0871466795603435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,45.18228658040365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,11.175525665283203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,24,128,1,float16,float16,0,11.297684987386068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,1,128,1,float16,float16,0,10.11848513285319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,2.204064051310221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,8,128,1,float16,float16,0,21.5816167195638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,2,128,1,float16,float16,0,10.266885121663412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.3951093355814616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,5.523530960083008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,24,128,1,float16,float16,0,5.224682807922363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.5368106762568156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,1,128,1,float16,float16,0,4.65937614440918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,4,128,1,float16,float16,0,10.574015935262045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,8,128,1,float16,float16,0,10.678826649983725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.9617973168691
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,2,128,1,float16,float16,0,4.519413312276204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,22.39983367919922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,2.766474723815918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.09345066547393799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,1,128,1,float16,float16,0,2.263141314188639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,4,128,1,float16,float16,0,4.364154815673828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,24,128,1,float16,float16,0,2.5524853070576987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,10.52014414469401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.19429334004720053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,8,128,1,float16,float16,0,4.835989316304524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.553109327952067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,2,128,1,float16,float16,0,2.1715946197509766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,4,128,1,float16,float16,0,2.3469066619873047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,4.719087918599446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,1.5196159680684407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,8,128,1,float16,float16,0,2.187898635864258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.170501391092936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.7594401041666665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,4.719519933064778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,24,128,1,float16,float16,0,13.40386708577474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,13.397876739501953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,1,128,1,float16,float16,0,11.601226806640625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.1553813616434734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.427029291788737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,1,128,1,float16,float16,0,24.524757385253906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,2.5691307385762534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,2,128,1,float16,float16,0,24.144810994466145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,4,128,1,float16,float16,0,25.168047587076824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,8,128,1,float16,float16,0,26.79065704345703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,2,128,1,float16,float16,0,12.026522318522135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,4,128,1,float16,float16,0,12.516021728515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,26.015045166015625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,6.148256301879883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,24,128,1,float16,float16,0,6.122896194458008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.5569066603978475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.7079306443532308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,1,128,1,float16,float16,0,5.094575881958008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,1.194106658299764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,8,128,1,float16,float16,0,12.703083038330078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,2,128,1,float16,float16,0,5.441408157348633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,4,128,1,float16,float16,0,5.173525174458821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,24,128,1,float16,float16,0,2.806389490763346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,8,128,1,float16,float16,0,5.324570655822754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,3.845434824625651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.17748266458511353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.35205332438151044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,1,128,1,float16,float16,0,2.61297607421875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,13.093952178955078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.6654986540476481
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,2,128,1,float16,float16,0,2.4169653256734214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,4,128,1,float16,float16,0,2.521077315012614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,24,128,1,float16,float16,0,1.5910080273946126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,1.6199199358622234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,6.200165430704753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,8,128,1,float16,float16,0,2.591503938039144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.06564799944559734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,1,128,1,float16,float16,0,1.18777600924174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.12436800201733907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.3805653254191081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,4,128,1,float16,float16,0,1.39463472366333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,2,128,1,float16,float16,0,1.2371359666188557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,8,128,1,float16,float16,0,1.218384027481079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.7699039777119955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,2.850543975830078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.83075745900472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,3.6005226771036782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,5.611130396525065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,12.716810862223307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,24,128,1,float16,float16,0,12.57800547281901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.4167893727620442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.7189280192057292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,1,128,1,float16,float16,0,22.5143305460612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,4,128,1,float16,float16,0,23.658485412597656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,2,128,1,float16,float16,0,23.292879740397137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,8,128,1,float16,float16,0,23.954694112141926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,1,128,1,float16,float16,0,10.612560272216797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,2.7184906005859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,2,128,1,float16,float16,0,11.171408335367838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,6.382133483886719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,24,128,1,float16,float16,0,5.830064137776692
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,1,128,1,float16,float16,0,4.652309417724609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,4,128,1,float16,float16,0,10.45788828531901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,25.05975341796875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.7525493303934733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.9175146420796713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,8,128,1,float16,float16,0,11.507984161376953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,1.4451252619425456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,2,128,1,float16,float16,0,4.931599934895833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.348960002263387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,24,128,1,float16,float16,0,2.7994187672932944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,3.095600128173828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,4,128,1,float16,float16,0,4.971397399902344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,1,128,1,float16,float16,0,2.146714687347412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,8,128,1,float16,float16,0,5.016709327697754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.43691198031107586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.6776853402455648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,12.345311482747396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,4,128,1,float16,float16,0,2.288149356842041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,24,128,1,float16,float16,0,1.56494935353597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,2,128,1,float16,float16,0,2.114917278289795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,1.6136159896850586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.06358399987220764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,8,128,1,float16,float16,0,2.2359414100646973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.12612799803415933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,5.456399917602539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,2,128,1,float16,float16,0,1.0885333220163982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.28618133068084717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,4,128,1,float16,float16,0,1.1346986293792725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,1,128,1,float16,float16,0,1.1578400135040283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,2.582143942515055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,1,128,1,float16,float16,0,0.5753013292948405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.7952480316162109
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,8,128,1,float16,float16,0,1.2082826296488445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.047050664822260536
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,24,128,1,float16,float16,0,0.7486186822255453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.07453333338101704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,2,128,1,float16,float16,0,0.574997345606486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,4,128,1,float16,float16,0,0.5846133232116699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.16519999504089355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,1.3715413411458333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,8,128,1,float16,float16,0,0.5825279951095581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.4315359989802043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,2.060416062672933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,2.4899892807006836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,3.691823959350586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,7.398997624715169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,1,128,1,float16,float16,0,5.289600054423015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,24,128,1,float16,float16,0,6.839871724446614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,2,128,1,float16,float16,0,12.821210225423178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,1.1063946882883708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,1,128,1,float16,float16,0,11.822420756022135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,4,128,1,float16,float16,0,13.912373860677084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,8,128,1,float16,float16,0,14.07846450805664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,1.2337120374043782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,1.8879200617472331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,2,128,1,float16,float16,0,5.765770594278972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,14.505087534586588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,24,128,1,float16,float16,0,3.3768908182779946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,3.8313118616739907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,4,128,1,float16,float16,0,5.934922536214192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.5477866729100546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,8,128,1,float16,float16,0,6.170165379842122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.6307093302408854
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,1,128,1,float16,float16,0,2.793813387552897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,2,128,1,float16,float16,0,2.5651680628458657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,1.0092586676279705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,4,128,1,float16,float16,0,2.847301483154297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,24,128,1,float16,float16,0,1.6988639831542969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,1.984330654144287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,8,128,1,float16,float16,0,3.0729173024495444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,6.77674674987793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.14070933063824972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.26258132855097455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,1,128,1,float16,float16,0,1.2987626393636067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,2,128,1,float16,float16,0,1.2972053686777751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,4,128,1,float16,float16,0,1.3670345942179363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.4686613480250041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,3.063119888305664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.9408000310262045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.05569066603978475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,1,128,1,float16,float16,0,0.6715946992238363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,2,128,1,float16,float16,0,0.6471413373947144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,24,128,1,float16,float16,0,0.8860800266265869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,8,128,1,float16,float16,0,1.3641866048177083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.0805866668621699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.18667733669281006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,1.5397814114888508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,8,128,1,float16,float16,0,0.7442346413930258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,4,128,1,float16,float16,0,0.7034880320231119
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,24,128,1,float16,float16,0,0.4277973175048828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.46223998069763184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.036805334190527596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,1,128,1,float16,float16,0,0.3466879924138387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.05760000149408976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,2,128,1,float16,float16,0,0.35440532366434735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.769594669342041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,8,128,1,float16,float16,0,0.3582293192545573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.12343466281890869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,4,128,1,float16,float16,0,0.3636480172475179
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.2641119956970215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,2.741845448811849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,4.614432017008464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,3.1641600926717124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,7.990885416666667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,4,128,1,float16,float16,0,13.37619145711263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,1.393152077992757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,8,128,1,float16,float16,0,13.721519470214844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,24,128,1,float16,float16,0,7.079221089680989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,1,128,1,float16,float16,0,12.703567504882812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,1.5881600379943848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,1,128,1,float16,float16,0,4.4610293706258135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,2,128,1,float16,float16,0,13.229867299397787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,2.309413274129232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,2,128,1,float16,float16,0,5.3267412185668945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,4,128,1,float16,float16,0,6.019194920857747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,8,128,1,float16,float16,0,5.587792078653972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,1,128,1,float16,float16,0,2.3752853075663247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.6833226680755615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,24,128,1,float16,float16,0,3.614309310913086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,14.251920064290365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,4.147775967915853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.7857226530710856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,2,128,1,float16,float16,0,2.417424043019613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,1.0997546513875325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,24,128,1,float16,float16,0,1.7645920117696126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.3229493300120036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,2.1364320119222007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,4,128,1,float16,float16,0,2.5182666778564453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,1,128,1,float16,float16,0,1.2069813410441081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,8,128,1,float16,float16,0,2.619472026824951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.36977601051330566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.5294453303019205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,4,128,1,float16,float16,0,1.2975893020629883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,2,128,1,float16,float16,0,1.2743253707885742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,8,128,1,float16,float16,0,1.3471840222676594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,3.0840800603230796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.9800000190734863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,24,128,1,float16,float16,0,0.8951733112335205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,2,128,1,float16,float16,0,0.6514826615651449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.047168001532554626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.18089600404103598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,1,128,1,float16,float16,0,0.6216426690419515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.08532800277074178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,6.25169054667155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,4,128,1,float16,float16,0,0.643829345703125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,24,128,1,float16,float16,0,0.4458453257878621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,8,128,1,float16,float16,0,0.6963786284128824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,1.575642744700114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.4931519826253255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,1,128,1,float16,float16,0,0.31866665681203205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.046709333856900535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.03777066618204117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,4,128,1,float16,float16,0,0.3510719935099284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,2,128,1,float16,float16,0,0.3152906696001689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.09940266609191895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,8,128,1,float16,float16,0,0.3407253424326579
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.24501333634058634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,1,128,1,float16,float16,0,0.1777120033899943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.8033013343811035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.025802666942278545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,24,128,1,float16,float16,0,0.19608000914255777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.03705599904060364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,2,128,1,float16,float16,0,0.18275733788808188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,4,128,1,float16,float16,0,0.18574933211008707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.4041759967803955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,8,128,1,float16,float16,0,0.18727999925613403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.14114133516947427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.06145066519578298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,2.065125306447347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,3.251002629597982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,2.524463971455892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,1,128,1,float16,float16,0,6.256090799967448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,4,128,1,float16,float16,0,6.876218795776367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,8,128,1,float16,float16,0,6.981856028238933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,2,128,1,float16,float16,0,6.878154754638672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,1.0396052996317546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,24,128,1,float16,float16,0,4.631615956624349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,1,128,1,float16,float16,0,2.6509013175964355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,1.1672746340433757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,5.456677118937175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,2,128,1,float16,float16,0,2.908341407775879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,1.6399200757344563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,4,128,1,float16,float16,0,3.0202932357788086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.5402506589889526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,24,128,1,float16,float16,0,2.337301254272461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,2.742159843444824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,8,128,1,float16,float16,0,3.220032056172689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,2,128,1,float16,float16,0,1.4363946914672852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,3.8183838526407876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,1,128,1,float16,float16,0,1.354848066965739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.7525440057118734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.5964373350143433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,4,128,1,float16,float16,0,1.5396960576375325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,24,128,1,float16,float16,0,1.1373759905497234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,1,128,1,float16,float16,0,0.7232159773508707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,8,128,1,float16,float16,0,1.5966614087422688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,1.346442699432373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.15758933623631796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.23555733760197958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,4,128,1,float16,float16,0,0.7881279786427816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.34700798988342285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,2,128,1,float16,float16,0,0.7526453336079916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,8,128,1,float16,float16,0,0.8245653311411539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,8.066869099934896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,1.9764320055643718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.640720009803772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.06883733471234639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,2,128,1,float16,float16,0,0.36060798168182373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,1,128,1,float16,float16,0,0.3520266612370809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.03604800005753835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,24,128,1,float16,float16,0,0.5741333166758219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.9649706681569418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.12387733658154805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,4,128,1,float16,float16,0,0.3898400068283081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.27533332506815594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,8,128,1,float16,float16,0,0.4354720115661621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.02646933247645696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,24,128,1,float16,float16,0,0.27696533997853595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,1,128,1,float16,float16,0,0.1949653426806132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,2,128,1,float16,float16,0,0.20438933372497559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,4,128,1,float16,float16,0,0.20260266462961832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.03682133307059606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,8,128,1,float16,float16,0,0.20261865854263306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.08855467041333516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.43653865655263263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,1,128,1,float16,float16,0,0.11036800344785054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,24,128,1,float16,float16,0,0.12015466888745625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.15225066741307577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.021509334444999695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,2,128,1,float16,float16,0,0.11326932907104492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.031178665657838184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.04698666433493296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,4,128,1,float16,float16,0,0.11443199714024861
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.10847999652226765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,8,128,1,float16,float16,0,0.11843199531237285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.2715946634610494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,2.7487147649129233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,3.1229918797810874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,6.279674530029297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,8,128,1,float16,float16,0,6.953797022501628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,1,128,1,float16,float16,0,6.5358931223551435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,2,128,1,float16,float16,0,5.789541244506836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,3.965504010518392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,24,128,1,float16,float16,0,5.021429379781087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,1,128,1,float16,float16,0,2.756592114766439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,1.4689653714497883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,2,128,1,float16,float16,0,3.1226879755655923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,1.5745919545491536
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,4,128,1,float16,float16,0,7.243946711222331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,4,128,1,float16,float16,0,2.9571094512939453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,24,128,1,float16,float16,0,2.525791962941488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,8,128,1,float16,float16,0,3.4370667139689126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,2.11955197652181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,3.3010133107503257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,1,128,1,float16,float16,0,1.3262613614400227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,8.19976552327474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.7193493048350016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,2,128,1,float16,float16,0,1.392090638478597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,4.275967915852864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.9560800393422445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,8,128,1,float16,float16,0,1.7047306696573894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,1.633285363515218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.8074933687845866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.33752532800038654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,4,128,1,float16,float16,0,1.540906588236491
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,24,128,1,float16,float16,0,1.2633386452992756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,2.10862398147583
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,2,128,1,float16,float16,0,0.738858699798584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,1,128,1,float16,float16,0,0.6920693715413412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.3702293237050374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,8,128,1,float16,float16,0,0.8830986817677816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,4,128,1,float16,float16,0,0.7694186369578043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.454528013865153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.7692053318023682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,1,128,1,float16,float16,0,0.3726079861323039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.044010668992996216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,24,128,1,float16,float16,0,0.6464746793111166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.06238399942715963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,2,128,1,float16,float16,0,0.38553067048390705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.13136000434557596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,1.0206720034281414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,4,128,1,float16,float16,0,0.4031039873758952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,8,128,1,float16,float16,0,0.4511466821034749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.33019200960795086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,24,128,1,float16,float16,0,0.31916799147923786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,1,128,1,float16,float16,0,0.1767786741256714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.02940266579389572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.03666666646798452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,2,128,1,float16,float16,0,0.18519467115402222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,8,128,1,float16,float16,0,0.19376534223556519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,4,128,1,float16,float16,0,0.19113065799077353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.06995733578999837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.5023680130640665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,1,128,1,float16,float16,0,0.09948266545931499
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,24,128,1,float16,float16,0,0.10840533177057902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.15152532855669656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,4,128,1,float16,float16,0,0.10545600454012553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,2,128,1,float16,float16,0,0.10097066561381023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.022272000710169475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.04433066646258036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.02784000088771184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.2379093368848165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.08947199583053589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,8,128,1,float16,float16,0,0.10643733541170756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,24,128,1,float16,float16,0,0.07075199981530507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.062447999914487205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,1,128,1,float16,float16,0,0.06084266801675161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.06737066805362701
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,2,128,1,float16,float16,0,0.06443200012048085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.1680799921353658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,8,128,1,float16,float16,0,0.06876266499360402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,4,128,1,float16,float16,0,0.07019199927647908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.08111999928951263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.10226133465766907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,2.7446505228678384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,1,128,1,float16,float16,0,4.684954643249512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,3.1169439951578775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,2,128,1,float16,float16,0,4.880319913228353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,3.9675893783569336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,24,128,1,float16,float16,0,4.830234527587891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,4,128,1,float16,float16,0,5.424853642781575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,1,128,1,float16,float16,0,1.6589013735453289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,6.1106611887613935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,8,128,1,float16,float16,0,6.222031911214192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,1.658016045888265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,1.3681599299112956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,1.9786826769510906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,5.7043202718098955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,4,128,1,float16,float16,0,2.115562597910563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,2,128,1,float16,float16,0,1.8078293800354004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,8,128,1,float16,float16,0,2.65993070602417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.8157227039337158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,1,128,1,float16,float16,0,0.8379093011220297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,2,128,1,float16,float16,0,0.91212264696757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,24,128,1,float16,float16,0,2.6217333475748696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,3.244197209676107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,2.8381172815958657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,24,128,1,float16,float16,0,1.212783972422282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.9490719636281332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.34676798184712726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,1.5715467135111492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,1.3946666717529297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,8,128,1,float16,float16,0,1.290826638539632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,2,128,1,float16,float16,0,0.4657440185546875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.372106671333313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,1,128,1,float16,float16,0,0.43133334318796795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,24,128,1,float16,float16,0,0.6414293448130289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,4,128,1,float16,float16,0,0.5355253219604492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.662224014600118
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,8,128,1,float16,float16,0,0.6183093388875326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.406879981358846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.6923893292744955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,4,128,1,float16,float16,0,1.0620960394541423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.04073066761096319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.05499733487764994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,1,128,1,float16,float16,0,0.2301386594772339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.6778506437937418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,4,128,1,float16,float16,0,0.27186665932337445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.10967466235160828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,8,128,1,float16,float16,0,0.3003466725349426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,24,128,1,float16,float16,0,0.25538132588068646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,2,128,1,float16,float16,0,0.24568533897399902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,1,128,1,float16,float16,0,0.1088853379090627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.2507573366165161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.0259253333012263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,4,128,1,float16,float16,0,0.12119999527931213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,2,128,1,float16,float16,0,0.11211199561754863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.10852799812952678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.05436266462008158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.34794668356577557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,8,128,1,float16,float16,0,0.12383466958999634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,1,128,1,float16,float16,0,0.06477866570154826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,24,128,1,float16,float16,0,0.07655466596285503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.1509066621462504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,2,128,1,float16,float16,0,0.06582933167616527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.024314666787783306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,4,128,1,float16,float16,0,0.07067733506361644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.0345920001467069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,8,128,1,float16,float16,0,0.07217066486676534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.059205333391825356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.05406933526198069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,24,128,1,float16,float16,0,0.03933866570393244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,2,128,1,float16,float16,0,0.03586133321126302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.06426666676998138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,1,128,1,float16,float16,0,0.03495466709136963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,4,128,1,float16,float16,0,0.037685332198937736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.0709440012772878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.12244799733161926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,1,128,1,float16,float16,0,0.02367999901374181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,24,128,1,float16,float16,0,0.026208000878492992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,8,128,1,float16,float16,0,0.0377866675456365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.08447466293970744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.03735466549793879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.04026666780312856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,4,128,1,float16,float16,0,0.024901332954565685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,2,128,1,float16,float16,0,0.02421333392461141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,8,128,1,float16,float16,0,0.025045332809289295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.04240000247955322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.048570667703946434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.06911466519037883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,1,128,1,float16,float16,0,1.4245600700378418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,1.3776052792867024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,2,128,1,float16,float16,0,1.6856800715128581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,1.6700426737467449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,2.021626631418864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,4,128,1,float16,float16,0,2.046677271525065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,24,128,1,float16,float16,0,2.427797317504883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,1,128,1,float16,float16,0,0.6537333329518636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,2,128,1,float16,float16,0,0.7423733075459799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.677232027053833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,8,128,1,float16,float16,0,2.6500959396362305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.7528639634450277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.9586400190989176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,4,128,1,float16,float16,0,0.9805920124053955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,2.8776960372924805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,2.7502028147379556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,8,128,1,float16,float16,0,1.236944039662679
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,1,128,1,float16,float16,0,0.3224959969520569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.3707786798477173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,1.4946346282958984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,1.2109013398488362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.34835731983184814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.40516801675160724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,24,128,1,float16,float16,0,0.5972213347752889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,4,128,1,float16,float16,0,0.4400213162104289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,2,128,1,float16,float16,0,0.36580801010131836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,8,128,1,float16,float16,0,0.63264532883962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,1,128,1,float16,float16,0,0.1572160025437673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.04135466615358988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.5615893205006918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,2,128,1,float16,float16,0,0.17965867122014365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.05217599868774414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.09665600458780925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,24,128,1,float16,float16,0,0.22075200080871582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,1,128,1,float16,float16,0,0.07669333120187123
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,4,128,1,float16,float16,0,0.2095093329747518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.6200000047683716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,24,128,1,float16,float16,0,1.2139093081156414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,8,128,1,float16,float16,0,0.28101332982381183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.02607999990383784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,2,128,1,float16,float16,0,0.08072000245253245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.23572266101837158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.030997333427270252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.2657279968261719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,1,128,1,float16,float16,0,0.04195733368396759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,8,128,1,float16,float16,0,0.09334400296211243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.08426133791605632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,2,128,1,float16,float16,0,0.04398400088151296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.020986666282018025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,4,128,1,float16,float16,0,0.08912000060081482
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.10586667060852051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,4,128,1,float16,float16,0,0.04764799773693085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,24,128,1,float16,float16,0,0.05147733290990194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.03090133269627889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.04140799989302953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,8,128,1,float16,float16,0,0.04840533435344696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,24,128,1,float16,float16,0,0.029504001140594482
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,1,128,1,float16,float16,0,0.02478400121132533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,2,128,1,float16,float16,0,0.025605333348115284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.05940799911816915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.06091199815273285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.10233066479365031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,4,128,1,float16,float16,0,0.027813332776228588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.06689600149790446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,8,128,1,float16,float16,0,0.02754666656255722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,24,128,1,float16,float16,0,0.019248000035683315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.07416533430417378
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,1,128,1,float16,float16,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,2,128,1,float16,float16,0,0.016794666647911072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.05857066810131073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.03718933214743932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.03685333331425985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,4,128,1,float16,float16,0,0.017935999979575474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.04196799794832865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,1,128,1,float16,float16,0,0.011866666376590729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.04238399863243103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,8,128,1,float16,float16,0,0.017952000101407368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,24,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.03620799879233042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,2,128,1,float16,float16,0,0.011978667229413986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.026208000878492992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,4,128,1,float16,float16,0,0.012063999970753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.02613866577545802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,8,128,1,float16,float16,0,0.012186666329701742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.029781334102153778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,2,128,1,float16,float16,0,0.7719360192616781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,1,128,1,float16,float16,0,0.6838346322377523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.755845308303833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,4,128,1,float16,float16,0,0.9441920121510824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.9621439774831136
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,8,128,1,float16,float16,0,1.2784586747487385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.729365348815918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,1,128,1,float16,float16,0,0.3365333477656047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,24,128,1,float16,float16,0,1.2123040358225505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,1.252351999282837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.3211573362350464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.4352480173110962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,1.4311680793762207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,2,128,1,float16,float16,0,0.37785065174102783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,4,128,1,float16,float16,0,0.4424746831258138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.3463733196258545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,24,128,1,float16,float16,0,0.5985813140869141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.5130720138549805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.0410453329483668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,2,128,1,float16,float16,0,0.15992533167203268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.05367999772230784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,8,128,1,float16,float16,0,0.2617493271827698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.2153653303782145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,1,128,1,float16,float16,0,0.13306132952372232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,8,128,1,float16,float16,0,0.6238933404286703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,24,128,1,float16,float16,0,0.21827733516693115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.6215146780014038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,1,128,1,float16,float16,0,0.06019733349482218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.2195146679878235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.025920001169045765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.046767999728520714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,4,128,1,float16,float16,0,0.20957867304484049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.09551466504732768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.06753600140412648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,24,128,1,float16,float16,0,0.04253333310286204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.031013332307338715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,2,128,1,float16,float16,0,0.06461866696675618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.08413333694140117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,1,128,1,float16,float16,0,0.03383466601371765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,4,128,1,float16,float16,0,0.07186133166154225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,8,128,1,float16,float16,0,0.0738560010989507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,4,128,1,float16,float16,0,0.03978666663169861
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,8,128,1,float16,float16,0,0.04012266546487808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.02847466617822647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,2,128,1,float16,float16,0,0.03581333408753077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.038047999143600464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,1,128,1,float16,float16,0,0.020026666422684986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,2,128,1,float16,float16,0,0.02107733239730199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,24,128,1,float16,float16,0,0.025018667181332905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,8,128,1,float16,float16,0,0.02313599983851115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.019744000087181728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,4,128,1,float16,float16,0,0.023071999351183575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.023738667368888855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,1,128,1,float16,float16,0,0.014117332796255747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.014101333916187286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,24,128,1,float16,float16,0,0.016336000214020412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,2,128,1,float16,float16,0,0.014165333161751429
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.04320000112056732
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,4,128,1,float16,float16,0,0.015072000523408255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,8,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.015461333096027374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.01441066712141037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.019530666371186573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,1,128,1,float16,float16,0,0.009541333342591921
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.027535999814669292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,2,128,1,float16,float16,0,0.009445333232482275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.013631999492645264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,4,128,1,float16,float16,0,0.009685333197315535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.018735999862353008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.013525333255529404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,24,128,1,float16,float16,0,0.01020800011853377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,24,128,1,float16,float16,0,0.009290666629870733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,8,128,1,float16,float16,0,0.009642666826645533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,2,128,1,float16,float16,0,0.008821333448092142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,4,128,1,float16,float16,0,0.009119999905427298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.01669866715868314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,1,128,1,float16,float16,0,0.34140264987945557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.3479893207550049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,2,128,1,float16,float16,0,0.3826826810836792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,4,128,1,float16,float16,0,0.44277334213256836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.32864532868067425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.4079413414001465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,24,128,1,float16,float16,0,0.5931626558303833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,1,128,1,float16,float16,0,0.1384266714255015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.04124800115823746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.6226079861323038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.5053706566492716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,8,128,1,float16,float16,0,0.6627253293991089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.05262400209903717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.09326400359471639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,2,128,1,float16,float16,0,0.1712053418159485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,1,128,1,float16,float16,0,0.05630399783452352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,8,128,1,float16,float16,0,0.2621120015780131
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.030741333961486816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.21336533625920615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.025936000049114227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,2,128,1,float16,float16,0,0.06039466460545858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,24,128,1,float16,float16,0,0.23808000485102335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,4,128,1,float16,float16,0,0.06723199784755707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,4,128,1,float16,float16,0,0.20655999581019083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.04680533210436503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,8,128,1,float16,float16,0,0.06834133466084798
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,1,128,1,float16,float16,0,0.031888000667095184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.059263999263445534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.19314134120941162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,24,128,1,float16,float16,0,0.040549332896868386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,2,128,1,float16,float16,0,0.033674667278925575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.020928000410397846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,4,128,1,float16,float16,0,0.03738666574160258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,24,128,1,float16,float16,0,0.022944000860055287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.028399998943010967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.032933334509531655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,1,128,1,float16,float16,0,0.018437333405017853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.05872533222039541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.03628266602754593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,2,128,1,float16,float16,0,0.019402666638294857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.015578666081031164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.0164533331990242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.019567999988794327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,4,128,1,float16,float16,0,0.02149333308140437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,24,128,1,float16,float16,0,0.015578666081031164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,8,128,1,float16,float16,0,0.03792533278465271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,1,128,1,float16,float16,0,0.013381333400805792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,2,128,1,float16,float16,0,0.013429333766301474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.014053333550691605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,8,128,1,float16,float16,0,0.021525333325068157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,4,128,1,float16,float16,0,0.014576000471909841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,8,128,1,float16,float16,0,0.01444799949725469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.014096000542243322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,24,128,1,float16,float16,0,0.009882666791478792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.016074666132529575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.015376000354687372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,2,128,1,float16,float16,0,0.008992000172535578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,8,128,1,float16,float16,0,0.009381333366036415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,4,128,1,float16,float16,0,0.009322666873534521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.01422400027513504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,24,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.013466666142145792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,1,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.013445333888133367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,2,128,1,float16,float16,0,0.008682666967312494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,24,128,1,float16,float16,0,0.008586666857202848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.013690666606028875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,1,128,1,float16,float16,0,0.008352000266313553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,2,128,1,float16,float16,0,0.008352000266313553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.012543999900420507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,8,128,1,float16,float16,0,0.008458666503429413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,4,128,1,float16,float16,0,0.008597333605090777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.013503999759753546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.01250133290886879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,1,128,1,float16,float16,0,0.1981653372446696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.046570668617884316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.06966933111349742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.15172266960144043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,2,128,1,float16,float16,0,0.21432000398635864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,4,128,1,float16,float16,0,0.24486400683720908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,8,128,1,float16,float16,0,0.30165332555770874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,24,128,1,float16,float16,0,0.23542400201161703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,1,128,1,float16,float16,0,0.09501866499582927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.26794666051864624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.26129599412282306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,2,128,1,float16,float16,0,0.09852799773216248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.02998399982849757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.061706667145093284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,24,128,1,float16,float16,0,0.058335999647776283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.03585066646337509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.09741866588592529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.0883893370628357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,4,128,1,float16,float16,0,0.10576533277829488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,1,128,1,float16,float16,0,0.04986133178075155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,8,128,1,float16,float16,0,0.10635733604431152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.025029333929220837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,2,128,1,float16,float16,0,0.05195199946562449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.023061332603295643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.04772266745567322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,4,128,1,float16,float16,0,0.054976001381874084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,8,128,1,float16,float16,0,0.05570666491985321
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,24,128,1,float16,float16,0,0.03297066688537598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.05366933345794678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,2,128,1,float16,float16,0,0.029477333029111225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,1,128,1,float16,float16,0,0.02868266652027766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,4,128,1,float16,float16,0,0.0310506671667099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,24,128,1,float16,float16,0,0.01882133384545644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.018511999398469925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.02385599911212921
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,1,128,1,float16,float16,0,0.01672533278663953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,8,128,1,float16,float16,0,0.03136533250411352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.02603733291228612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,2,128,1,float16,float16,0,0.016714667280515034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,4,128,1,float16,float16,0,0.017797333498795826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.035402665535608925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.017797333498795826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.019359999646743137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,1,128,1,float16,float16,0,0.012309333930412928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,8,128,1,float16,float16,0,0.01794133335351944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,24,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.020015999674797058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,2,128,1,float16,float16,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,4,128,1,float16,float16,0,0.012639999389648438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.01775466650724411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,8,128,1,float16,float16,0,0.012671999633312225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.019738666713237762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,1,128,1,float16,float16,0,0.008447999755541483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.01746133342385292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,24,128,1,float16,float16,0,0.008736000085870424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,2,128,1,float16,float16,0,0.008432000254591307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,4,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,8,128,1,float16,float16,0,0.0086666668454806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.017477333545684814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.016634666671355564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,1,128,1,float16,float16,0,0.008197333042820295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,24,128,1,float16,float16,0,0.008373333141207695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,2,128,1,float16,float16,0,0.008256000156203905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,4,128,1,float16,float16,0,0.008410666758815447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,8,128,1,float16,float16,0,0.008309333274761835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,24,128,1,float16,float16,0,0.008309333274761835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,1,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.016506666938463848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,8,128,1,float16,float16,0,0.008127999802430471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.016597333053747814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,2,128,1,float16,float16,0,0.17561600605646768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,1,128,1,float16,float16,0,0.17161067326863608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.04900800188382467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.03239466746648153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,4,128,1,float16,float16,0,0.18371200561523438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,8,128,1,float16,float16,0,0.18530666828155518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.08712533116340637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,24,128,1,float16,float16,0,0.09713600079218547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,1,128,1,float16,float16,0,0.08832533160845439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.1551146705945333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.027162666122118633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,2,128,1,float16,float16,0,0.09021866321563721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.12755733728408813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.04640000065167745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,24,128,1,float16,float16,0,0.05096533397833506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,4,128,1,float16,float16,0,0.09447999795277913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,1,128,1,float16,float16,0,0.04638933142026266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.06903466582298279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,8,128,1,float16,float16,0,0.09449600179990132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.08341333270072937
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,4,128,1,float16,float16,0,0.04909333089987437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.02082666630546252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,2,128,1,float16,float16,0,0.0470773329337438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.025514667232831318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,8,128,1,float16,float16,0,0.04910400013128916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,1,128,1,float16,float16,0,0.027029333015282948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.03811733424663544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,24,128,1,float16,float16,0,0.029146666328112285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,2,128,1,float16,float16,0,0.027034667630990345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.017642666896184284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.017632000148296356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,4,128,1,float16,float16,0,0.028090665737787884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,8,128,1,float16,float16,0,0.02809600035349528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.019754666835069656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,1,128,1,float16,float16,0,0.015743999431530636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,24,128,1,float16,float16,0,0.016730666160583496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.046570668617884316
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,2,128,1,float16,float16,0,0.01591466615597407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.021642667551835377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.017562666287024815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,4,128,1,float16,float16,0,0.01605333387851715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.03127466638882955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,24,128,1,float16,float16,0,0.012122667084137598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.018677332748969395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,8,128,1,float16,float16,0,0.016656000167131424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,2,128,1,float16,float16,0,0.011829332758982977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,4,128,1,float16,float16,0,0.01192533348997434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.016602666427691776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,8,128,1,float16,float16,0,0.011887999872366587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,24,128,1,float16,float16,0,0.008352000266313553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,2,128,1,float16,float16,0,0.008117333054542542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,1,128,1,float16,float16,0,0.008234666660428047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,4,128,1,float16,float16,0,0.008229333286484083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,8,128,1,float16,float16,0,0.008170666793982187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.017616000026464462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,1,128,1,float16,float16,0,0.007903999959429106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,24,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,2,128,1,float16,float16,0,0.007978666573762894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,8,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,4,128,1,float16,float16,0,0.0081386665503184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,24,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,2,128,1,float16,float16,0,0.007893333211541176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.016197333733240765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,1,128,1,float16,float16,0,0.007791999727487564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.016645333419243496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,4,128,1,float16,float16,0,0.00795199970404307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,8,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,1,128,1,float16,float16,0,0.013594667116800943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,fp8,0,0.01643199970324834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,2,128,1,float16,float16,0,0.02625600000222524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.022469334304332733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,8,128,1,float16,float16,0,0.06473066906134288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,4,128,1,float16,float16,0,0.04029866556326548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.03402666747570038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.05357866485913595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,24,128,1,float16,float16,0,0.08108800152937572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,1,128,1,float16,float16,0,0.009872000043590864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,2,128,1,float16,float16,0,0.017722666263580322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,4,128,1,float16,float16,0,0.024826665719350178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.07210666437943776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,24,128,1,float16,float16,0,0.04268800218900045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,1,128,1,float16,float16,0,0.009370666618148485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,8,128,1,float16,float16,0,0.03890133400758108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.04131733377774557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.03264000018437704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,fp8,0,0.012341332932313284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,2,128,1,float16,float16,0,0.013232000172138214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.012416000167528788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.01452800010641416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,8,128,1,float16,float16,0,0.02401600033044815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,4,128,1,float16,float16,0,0.01718933383623759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,24,128,1,float16,float16,0,0.025194667279720306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.020517333100239437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,1,128,1,float16,float16,0,0.009103999783595404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,fp8,0,0.011813333878914515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,2,128,1,float16,float16,0,0.012752000242471695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.011792000383138657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,4,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.01198400060335795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.026517334083716076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,8,128,1,float16,float16,0,0.016613333175579708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,24,128,1,float16,float16,0,0.015109332899252573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.013850666582584381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,fp8,0,0.011535999675591787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,2,128,1,float16,float16,0,0.01251199965675672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,4,128,1,float16,float16,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,8,128,1,float16,float16,0,0.012831999609867731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,24,128,1,float16,float16,0,0.011530666301647821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.011578666667143503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.01349866638580958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,2,128,1,float16,float16,0,0.012309333930412928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,4,128,1,float16,float16,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.014778666198253632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,8,128,1,float16,float16,0,0.012629333883523941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,24,128,1,float16,float16,0,0.007887999837597212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,2,128,1,float16,float16,0,0.01210133358836174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,4,128,1,float16,float16,0,0.012128000458081564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,8,128,1,float16,float16,0,0.0122079998254776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,24,128,1,float16,float16,0,0.008010666817426682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.014789332946141561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,1,128,1,float16,float16,0,0.008576000109314919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,2,128,1,float16,float16,0,0.012053333222866058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,4,128,1,float16,float16,0,0.012128000458081564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,8,128,1,float16,float16,0,0.012047999848922094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.014266667266686758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,24,128,1,float16,float16,0,0.007887999837597212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,1,128,1,float16,float16,0,0.009573333586255709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.011333333949247995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,2,128,1,float16,float16,0,0.008485333373149237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,4,128,1,float16,float16,0,0.008725333337982496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.012543999900420507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,8,128,1,float16,float16,0,0.008592000231146812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,2.704869270324707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,4.97432009379069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,15.374373118082682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,16,128,1,float16,float16,0,28.09381357828776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,1.3879146575927734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,1,128,1,float16,float16,0,25.925323486328125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.7149225870768228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,1,128,1,float16,float16,0,53.29522705078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,2,128,1,float16,float16,0,52.7349599202474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,4,128,1,float16,float16,0,53.066619873046875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,8,128,1,float16,float16,0,53.603302001953125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,2,128,1,float16,float16,0,26.6354497273763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,59.523966471354164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,7.801306406656901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,0.7099040349324545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,56.42304484049479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,16,128,1,float16,float16,0,13.597919464111328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.5251146952311199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,4,128,1,float16,float16,0,26.69231414794922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,1,128,1,float16,float16,0,13.642042795817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,3.711711883544922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,2,128,1,float16,float16,0,14.079765319824219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,8,128,1,float16,float16,0,28.10119883219401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,4,128,1,float16,float16,0,13.980613708496094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,30.388870239257812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,16,128,1,float16,float16,0,7.1122080485026045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.26838932434717816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.7780106862386068
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,8,128,1,float16,float16,0,14.206367492675781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,1,128,1,float16,float16,0,6.610160191853841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,2.320005257924398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,31.04222361246745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,2,128,1,float16,float16,0,6.5719254811604815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,4,128,1,float16,float16,0,5.64404296875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,15.648981730143229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,1.8996319770812988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.3128585815429688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,8,128,1,float16,float16,0,6.475002924601237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,15.321812947591146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,6.917642593383789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,9.27728017171224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,16,128,1,float16,float16,0,16.30458704630534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,1.0273226896921794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,1,128,1,float16,float16,0,15.3077761332194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,1,128,1,float16,float16,0,30.947540283203125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,2,128,1,float16,float16,0,30.64488983154297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.7900266647338867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,4,128,1,float16,float16,0,30.70642598470052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,8,128,1,float16,float16,0,30.754356384277344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,4.534618695576985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,2,128,1,float16,float16,0,15.1865603129069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,33.31541442871094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.5326026678085327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,32.95843251546224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,16,128,1,float16,float16,0,7.650400161743164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,1.012895981470744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,4,128,1,float16,float16,0,15.264757792154947
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,8,128,1,float16,float16,0,15.626485188802084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,1,128,1,float16,float16,0,7.150218963623047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,2,128,1,float16,float16,0,6.950901031494141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,2.254133383433024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,16.69667689005534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.18434667587280273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,16,128,1,float16,float16,0,3.754255930582682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,4,128,1,float16,float16,0,7.452480316162109
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.4471999804178874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,17.534688313802082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,1,128,1,float16,float16,0,3.116191864013672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,8,128,1,float16,float16,0,7.178880055745442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,1.2196160157521565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,8.369839986165365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,2,128,1,float16,float16,0,3.1184425354003906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,4,128,1,float16,float16,0,3.470975875854492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,7.929381052652995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,1.4944267272949219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,8,128,1,float16,float16,0,3.7335519790649414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,3.9596001307169595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.469498634338379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,6.544890721638997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,16,128,1,float16,float16,0,11.166122436523438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,0.7964906692504883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,1,128,1,float16,float16,0,9.974405288696289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,1,128,1,float16,float16,0,22.22173817952474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,2,128,1,float16,float16,0,21.665562947591145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.5198666254679363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,4,128,1,float16,float16,0,22.690592447916668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,8,128,1,float16,float16,0,22.020980834960938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,3.4509493509928384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,2,128,1,float16,float16,0,9.493882497151693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,23.64417012532552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,25.506601969401043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.4082080125808716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,16,128,1,float16,float16,0,5.172608057657878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,4,128,1,float16,float16,0,10.999397277832031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.6834666728973389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,1,128,1,float16,float16,0,4.693957328796387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,8,128,1,float16,float16,0,11.156239827473959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,2,128,1,float16,float16,0,4.762784004211426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,1.778447945912679
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,4,128,1,float16,float16,0,4.9783681233723955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,12.050464630126953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.14685866236686707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,16,128,1,float16,float16,0,2.5606560707092285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,1,128,1,float16,float16,0,2.21236260732015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3465760151545207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,8,128,1,float16,float16,0,4.395450592041016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.8684746424357096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,12.046991984049479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,2,128,1,float16,float16,0,2.368565400441488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,4,128,1,float16,float16,0,2.1890293757120767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,5.67410151163737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,8,128,1,float16,float16,0,2.223456064860026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,5.540096282958984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,3.047429402669271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,2.2785332997639975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,3.53547732035319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,8.872165044148764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,16,128,1,float16,float16,0,15.272688547770182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.1528586546579997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,1,128,1,float16,float16,0,13.79198964436849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,1,128,1,float16,float16,0,28.65753682454427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,2,128,1,float16,float16,0,29.073023478190105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,4,128,1,float16,float16,0,29.46880594889323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,8,128,1,float16,float16,0,29.548207600911457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.9067573547363281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,31.15259297688802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,2,128,1,float16,float16,0,14.111317952473959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,4.304986635843913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,30.088063557942707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.601365327835083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,4,128,1,float16,float16,0,13.951279958089193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,16,128,1,float16,float16,0,7.119530359903972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,1,128,1,float16,float16,0,6.432933171590169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,2,128,1,float16,float16,0,6.497791926066081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.9448906580607096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,8,128,1,float16,float16,0,14.337679545084635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,2.2909812927246094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,15.178719838460287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.1936639944712321
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,4,128,1,float16,float16,0,6.585856119791667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,1,128,1,float16,float16,0,2.7512585322062173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,16,128,1,float16,float16,0,3.364720026652018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,15.07702891031901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,8,128,1,float16,float16,0,6.97654914855957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,2,128,1,float16,float16,0,2.8091627756754556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.521562655766805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,1.160645325978597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,7.408058802286784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,16,128,1,float16,float16,0,1.7700799306233723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,4,128,1,float16,float16,0,2.86519463857015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.10544533530871074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,7.113536198933919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,8,128,1,float16,float16,0,3.242543856302897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.28083733717600506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,1,128,1,float16,float16,0,1.3582560221354167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,2,128,1,float16,float16,0,1.4244319597880046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,3.694981257120768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.7221600214640299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,4,128,1,float16,float16,0,1.4425973892211914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,3.514693260192871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,8,128,1,float16,float16,0,1.5531253814697266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,1.9140267372131348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.6391679445902507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,2.415818691253662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,5.274298667907715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,16,128,1,float16,float16,0,7.8167463938395185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.8239839871724447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,1,128,1,float16,float16,0,7.4728959401448565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,1,128,1,float16,float16,0,15.943717956542969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,2,128,1,float16,float16,0,16.831743876139324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,1.3283572991689045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,4,128,1,float16,float16,0,17.394810994466145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,8,128,1,float16,float16,0,17.103535970052082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,2,128,1,float16,float16,0,6.291818618774414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,2.734090805053711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,18.274005889892578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,17.650821685791016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.45713599522908527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,4,128,1,float16,float16,0,7.6583677927653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,16,128,1,float16,float16,0,4.176282564798991
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.6375946601231893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,1,128,1,float16,float16,0,3.2616265614827475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,8,128,1,float16,float16,0,7.4613602956136065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,2,128,1,float16,float16,0,3.813151995340983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,1.4349652926127117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,16,128,1,float16,float16,0,2.1008480389912925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,4,128,1,float16,float16,0,3.5684000651041665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,1,128,1,float16,float16,0,1.6661814053853352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,9.99079449971517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.12005333105723064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,8,128,1,float16,float16,0,3.264058748881022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.2558186650276184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,8.69758415222168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.7910613218943278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,4.035941441853841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,2,128,1,float16,float16,0,1.8412693341573079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,4,128,1,float16,float16,0,1.8703947067260742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.07780266801516215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,8,128,1,float16,float16,0,2.09989865620931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,16,128,1,float16,float16,0,1.065717299779256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,1,128,1,float16,float16,0,1.014031966527303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,5.026106516520183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.15684800346692404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,2.02458127339681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,2,128,1,float16,float16,0,0.8914079666137695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.4535520076751709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,2.0350240071614585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,4,128,1,float16,float16,0,1.0710079669952393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,8,128,1,float16,float16,0,0.860853354136149
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,1.2748586336771648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,2.0797227223714194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,2.8509387969970703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,5.396565119425456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,16,128,1,float16,float16,0,7.737589518229167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,1,128,1,float16,float16,0,14.124735514322916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,2,128,1,float16,float16,0,15.551503499348959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,1.0344693660736084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,1,128,1,float16,float16,0,6.384090423583984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,4,128,1,float16,float16,0,15.623338063557943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,1.4526185989379883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,8,128,1,float16,float16,0,16.262752532958984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,17.87303415934245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,2.7076266606648765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,16.5688959757487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,2,128,1,float16,float16,0,6.7111358642578125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,4,128,1,float16,float16,0,6.0346934000651045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.5291946729024252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,16,128,1,float16,float16,0,3.7714878718058267
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,8,128,1,float16,float16,0,7.529930750528972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,1,128,1,float16,float16,0,3.113792101542155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.7606720129648844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,2,128,1,float16,float16,0,3.3686561584472656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,1.4521279335021973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,8.445770899454752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,4,128,1,float16,float16,0,3.261925379435221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,7.096975962320964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,8,128,1,float16,float16,0,3.247941335042318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.1411946713924408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,16,128,1,float16,float16,0,2.045957406361898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,1,128,1,float16,float16,0,1.6694614092508953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,3.6745707194010415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.35395201047261554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.6989813645680746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,3.5761067072550454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,4,128,1,float16,float16,0,1.6063146591186523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,16,128,1,float16,float16,0,0.9276053110758463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,2,128,1,float16,float16,0,1.5872000058492024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.064751997590065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.14587199687957764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,1,128,1,float16,float16,0,0.8687360286712646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,1.8853440284729004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,8,128,1,float16,float16,0,1.6007679303487141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,2,128,1,float16,float16,0,0.759178638458252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,1.83023468653361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,4,128,1,float16,float16,0,0.7589279810587565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.4063626527786255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,8,128,1,float16,float16,0,0.8749759991963705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.048767998814582825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,1.0070827007293701
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,2,128,1,float16,float16,0,0.4091786543528239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,16,128,1,float16,float16,0,0.4400959809621175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.08183999856313069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,1,128,1,float16,float16,0,0.41105600198109943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.9158133665720621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,4,128,1,float16,float16,0,0.4349759817123413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.21332800388336182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,8,128,1,float16,float16,0,0.42025601863861084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.623802661895752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,2.0143893559773765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,1.512986660003662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,3.650965372721354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,16,128,1,float16,float16,0,4.482794761657715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,4,128,1,float16,float16,0,8.095717112223307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,1,128,1,float16,float16,0,3.451984087626139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,8,128,1,float16,float16,0,9.129285176595053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.7530720233917236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,2,128,1,float16,float16,0,8.185717264811197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,1,128,1,float16,float16,0,8.111050923665365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,9.777557373046875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,1.009504000345866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,2,128,1,float16,float16,0,3.8557812372843423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,1.8160319328308105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,4,128,1,float16,float16,0,3.8385705947875977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.3585386673609416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,16,128,1,float16,float16,0,2.3509066899617515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,8.83239491780599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,1,128,1,float16,float16,0,1.82259734471639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,8,128,1,float16,float16,0,3.8888638814290366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.476639986038208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,4.539626757303874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,4.276485443115234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,2,128,1,float16,float16,0,1.9927253723144531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,4,128,1,float16,float16,0,1.8252479235331218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,16,128,1,float16,float16,0,1.1545173327128093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.09372799595197041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,1.01419202486674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,1,128,1,float16,float16,0,0.9855946699778239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.14216533303260803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,8,128,1,float16,float16,0,2.064527988433838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,2,128,1,float16,float16,0,0.9661973317464193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,2.2345120112101235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,4,128,1,float16,float16,0,1.0035040378570557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.5206666787465414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,16,128,1,float16,float16,0,0.5729759931564331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,2.1513493855794272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.05082133412361145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,8,128,1,float16,float16,0,1.022005319595337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,1.1608479817708333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.10988799730936687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,1.1395200093587239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.2471146583557129
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,8,128,1,float16,float16,0,0.4824800093968709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,1,128,1,float16,float16,0,0.48334399859110516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,4,128,1,float16,float16,0,0.5283946593602499
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,2,128,1,float16,float16,0,0.45508265495300293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,16,128,1,float16,float16,0,0.26150933901468915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.5674453179041544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.041434665520985924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,1,128,1,float16,float16,0,0.2537600000699361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.06749866902828217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.5812906821568807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.16484799981117249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,8,128,1,float16,float16,0,0.26075732707977295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,4,128,1,float16,float16,0,0.26613332827885944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,2,128,1,float16,float16,0,0.2632960081100464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.41818666458129883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,1.9519519805908203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,2.4822559356689453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,4.077439943949382
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,1,128,1,float16,float16,0,8.090522766113281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,4,128,1,float16,float16,0,8.163706461588541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,8,128,1,float16,float16,0,8.289183934529623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,2,128,1,float16,float16,0,7.483301162719727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,1.0570826530456543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,1,128,1,float16,float16,0,3.07698663075765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,1.2771413326263428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,16,128,1,float16,float16,0,4.845002810160319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,2,128,1,float16,float16,0,3.3103199005126953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,9.3221066792806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,16,128,1,float16,float16,0,2.3569067319234214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,9.000368118286133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,4,128,1,float16,float16,0,3.4752960205078125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,8,128,1,float16,float16,0,3.8415466944376626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,2.042970657348633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,1,128,1,float16,float16,0,1.634885311126709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.46754666169484455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,4.601807912190755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.5972213347752889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,4,128,1,float16,float16,0,1.7600213686625164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.9964479605356852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,2,128,1,float16,float16,0,1.6017333666483562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,16,128,1,float16,float16,0,1.2081759770711262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,4.164762814839681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,8,128,1,float16,float16,0,2.083402633666992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,2.3673173586527505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.1048906644185384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,1,128,1,float16,float16,0,0.8184746901194254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.261845330397288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,4,128,1,float16,float16,0,1.0217546621958415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.4869493246078491
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,16,128,1,float16,float16,0,0.5862506628036499
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.045994664231936135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,2,128,1,float16,float16,0,0.8458453019460043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,1,128,1,float16,float16,0,0.44651734828948975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,1.1214346885681152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,2.177061398824056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,8,128,1,float16,float16,0,0.9722186724344889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,1.0059999624888103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.081535999973615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,2,128,1,float16,float16,0,0.41462401549021405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.22446932395299277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,8,128,1,float16,float16,0,0.48579200108846027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,1,128,1,float16,float16,0,0.22542399168014526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,16,128,1,float16,float16,0,0.26336532831192017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.032885332902272545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.5862826506296793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,2,128,1,float16,float16,0,0.23809067408243814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.05147199829419454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,4,128,1,float16,float16,0,0.4445493221282959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.4951466719309489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,4,128,1,float16,float16,0,0.23726399739583334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,8,128,1,float16,float16,0,0.2427306572596232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.02754666656255722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.11894933382670085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,1,128,1,float16,float16,0,0.12863999605178833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,2,128,1,float16,float16,0,0.1332266628742218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,16,128,1,float16,float16,0,0.13944000005722046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.32391999165217084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.3103040059407552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,4,128,1,float16,float16,0,0.1366933286190033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,8,128,1,float16,float16,0,0.13578133781750998
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.0765173335870107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.04233066737651825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.18743467330932617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,1.4654827117919922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,2.7880798975626626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,1.7947786649068196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,4,128,1,float16,float16,0,4.322400093078613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,8,128,1,float16,float16,0,4.740106582641602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,1,128,1,float16,float16,0,3.928288141886393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,2,128,1,float16,float16,0,3.8632799784342446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,5.9351145426432295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.725488026936849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,16,128,1,float16,float16,0,3.1335252126057944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.9091253280639648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,1,128,1,float16,float16,0,1.8298560778299968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,1.3747199376424153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,2,128,1,float16,float16,0,1.999381383260091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,4,128,1,float16,float16,0,2.1731093724568686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,5.0453494389851885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,8,128,1,float16,float16,0,2.3570292790730796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,16,128,1,float16,float16,0,1.518938700358073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,1,128,1,float16,float16,0,0.9305919806162516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,2.9570560455322266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.34324800968170166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,4,128,1,float16,float16,0,1.1154613494873047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.4522293408711751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,2.596245288848877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,2,128,1,float16,float16,0,0.9551306565602621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.6809386412302653
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,8,128,1,float16,float16,0,1.178261359532674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,16,128,1,float16,float16,0,0.7400960127512614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.06353599826494853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,1.44813871383667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,2,128,1,float16,float16,0,0.50164266427358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.11410133043924968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,4,128,1,float16,float16,0,0.5557119846343994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.30538666248321533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,8,128,1,float16,float16,0,0.6049813429514567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,1,128,1,float16,float16,0,0.49566932519276935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,1.2822986443837483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.03536533315976461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,1,128,1,float16,float16,0,0.2510133385658264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,16,128,1,float16,float16,0,0.3794879913330078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,2,128,1,float16,float16,0,0.2576106588045756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.6833279927571615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.06523199876149495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,4,128,1,float16,float16,0,0.269050657749176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,8,128,1,float16,float16,0,0.28600533803304035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.14511467019716898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.6489973465601603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,16,128,1,float16,float16,0,0.15416533748308817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,1,128,1,float16,float16,0,0.14588266611099243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.026848000784715016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,2,128,1,float16,float16,0,0.1455626686414083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,4,128,1,float16,float16,0,0.15314132968584696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.32686932881673175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.040149333576361336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.09371200203895569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,8,128,1,float16,float16,0,0.15466133753458658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,16,128,1,float16,float16,0,0.09220799803733826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.20267200469970703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.325381338596344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.022570667167504627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,1,128,1,float16,float16,0,0.0872373382250468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,2,128,1,float16,float16,0,0.08920533458391826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.0346666673819224
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.20107199748357138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,4,128,1,float16,float16,0,0.09295466542243958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,8,128,1,float16,float16,0,0.09224533041318257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.05934933324654897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.14844266573588052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,1.9551413853963215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,1,128,1,float16,float16,0,3.8630507787068686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,2.3555307388305664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,8,128,1,float16,float16,0,5.131066640218099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,2,128,1,float16,float16,0,3.792869249979655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.98471466700236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,1,128,1,float16,float16,0,1.7890666325887044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,3.3044907251993814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,1.1621013482411702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,4,128,1,float16,float16,0,4.457903861999512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,16,128,1,float16,float16,0,3.355274518330892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,5.487226486206055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,6.411312103271484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,2,128,1,float16,float16,0,1.8848053614298503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,4,128,1,float16,float16,0,2.125802675882975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,1,128,1,float16,float16,0,0.9463733037312826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.457258661588033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,2.7844107945760093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,8,128,1,float16,float16,0,2.547551949818929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.5385973453521729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,1.656549294789632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,16,128,1,float16,float16,0,1.6829652786254883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,2,128,1,float16,float16,0,0.9522026379903158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,4,128,1,float16,float16,0,1.0999840100606282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,8,128,1,float16,float16,0,1.2607946395874023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,1,128,1,float16,float16,0,0.47780799865722656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.8082293669382731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,1.5861973762512207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.08358933528264363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.209824005762736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,1.3676907221476238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,2,128,1,float16,float16,0,0.49777066707611084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,3.2455307642618814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,8,128,1,float16,float16,0,0.6213279962539673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.35019199053446454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,4,128,1,float16,float16,0,0.5583200057347616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,16,128,1,float16,float16,0,0.42690666516621906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.7351306279500326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.6613440116246542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,1,128,1,float16,float16,0,0.23915199438730875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,16,128,1,float16,float16,0,0.8436266581217448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.03625066578388214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.06162666777769724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,2,128,1,float16,float16,0,0.24356265862782797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,8,128,1,float16,float16,0,0.314736008644104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,4,128,1,float16,float16,0,0.2756906747817993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.14612799882888794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,16,128,1,float16,float16,0,0.16249066591262817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,1,128,1,float16,float16,0,0.1267253359158834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.0258240004380544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,2,128,1,float16,float16,0,0.13352533181508383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.03651199986537298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,4,128,1,float16,float16,0,0.14035200079282126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,8,128,1,float16,float16,0,0.14632532993952432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.367520014444987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.29253333806991577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,16,128,1,float16,float16,0,0.08545600374539693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.07930666704972585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.18363199631373087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,1,128,1,float16,float16,0,0.07982933521270752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.02081599955757459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,4,128,1,float16,float16,0,0.084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.18371733029683432
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,2,128,1,float16,float16,0,0.08126399914423625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.04935466746489207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,8,128,1,float16,float16,0,0.08518399794896443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.029535998900731403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,16,128,1,float16,float16,0,0.0472320020198822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.11641066273053487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.047423998514811196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,2,128,1,float16,float16,0,0.0439573327700297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,1,128,1,float16,float16,0,0.04387199878692627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,4,128,1,float16,float16,0,0.04619200030962626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.05545066793759664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.12310933073361714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.07261866827805837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,8,128,1,float16,float16,0,0.046282668908437095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.09079999725023906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,1.9508426984151204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,1,128,1,float16,float16,0,3.187866528828939
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,2.33350404103597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,2,128,1,float16,float16,0,3.374058723449707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,4,128,1,float16,float16,0,3.8989388147989907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,3.169877370198568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,1,128,1,float16,float16,0,1.1368213494618733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,16,128,1,float16,float16,0,3.208197275797526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,8,128,1,float16,float16,0,4.739269256591797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.9596532980600992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,4,128,1,float16,float16,0,1.588304042816162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,1.1530720392862956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,4.8645172119140625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,4.163397471110026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,2,128,1,float16,float16,0,1.2845919926961262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.4921226501464844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,2,128,1,float16,float16,0,0.6506133476893107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,1.5510719617207844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,2.062725385030111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,16,128,1,float16,float16,0,1.6119626363118489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,8,128,1,float16,float16,0,2.1155573527018228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.5628159840901693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,4,128,1,float16,float16,0,0.8063146273295084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,2.457690715789795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.9631626605987549
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,1,128,1,float16,float16,0,0.31014933188756305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,1,128,1,float16,float16,0,0.5757013161977133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.769589344660441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,2,128,1,float16,float16,0,0.33315199613571167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.18514132499694824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,8,128,1,float16,float16,0,1.0159893035888672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,4,128,1,float16,float16,0,0.39526931444803876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,16,128,1,float16,float16,0,0.8098613421122233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,1.2759413719177246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,1,128,1,float16,float16,0,0.14145066340764365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.2913706700007121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.532474676767985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,2,128,1,float16,float16,0,0.15708266695340475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.45289067427317303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,8,128,1,float16,float16,0,0.485370675722758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.049733335773150124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.11364266276359558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,16,128,1,float16,float16,0,0.10704533259073894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,16,128,1,float16,float16,0,0.3648800055185954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,8,128,1,float16,float16,0,0.22566932439804077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.24888533353805542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,4,128,1,float16,float16,0,0.19197867314020792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.07595199843247731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.18731200695037842
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.022453332940737408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,1,128,1,float16,float16,0,0.08037333190441132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,2,128,1,float16,float16,0,0.08265066643555959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,4,128,1,float16,float16,0,0.09371200203895569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.033071999748547874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.030506665507952373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.05696000158786774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,16,128,1,float16,float16,0,0.052442664901415505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,1,128,1,float16,float16,0,0.04400533437728882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,8,128,1,float16,float16,0,0.09191999832789104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.12226133545239766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.11807466546694438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.017525333911180496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,2,128,1,float16,float16,0,0.04654933512210846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.022783999641736347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.03626666714747747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,16,128,1,float16,float16,0,0.030938667555650074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,8,128,1,float16,float16,0,0.0507893313964208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,4,128,1,float16,float16,0,0.05150933563709259
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.06343466540177663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,2,128,1,float16,float16,0,0.028090665737787884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,4,128,1,float16,float16,0,0.03014933317899704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.045109331607818604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.04914666712284088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.09204799930254619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,8,128,1,float16,float16,0,0.03031466652949651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,1,128,1,float16,float16,0,0.02752000093460083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.059018666545550026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,16,128,1,float16,float16,0,0.01878400022784869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,1,128,1,float16,float16,0,0.017130666722853977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.06946666538715363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,2,128,1,float16,float16,0,0.017258666455745697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.029450667401154835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,4,128,1,float16,float16,0,0.01850133389234543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.03268266717592875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,8,128,1,float16,float16,0,0.018613333503405254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.03454400102297465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.050901333491007485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.04078399886687597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,1,128,1,float16,float16,0,0.9499626954396566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.9610613187154134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,1.1578400135040283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,2,128,1,float16,float16,0,1.1903253396352131
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,1.5830292701721191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,16,128,1,float16,float16,0,1.6215039889017742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,4,128,1,float16,float16,0,1.5552959442138672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,1.6705973943074544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,1,128,1,float16,float16,0,0.43748267491658527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.46056000391642254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,8,128,1,float16,float16,0,2.1493919690450034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,8,128,1,float16,float16,0,1.0206879774729412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,4,128,1,float16,float16,0,0.6968106428782145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,1.2686293125152588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,2,128,1,float16,float16,0,0.5223093430201212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.7249546845753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.5258453289667765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,16,128,1,float16,float16,0,0.8018346627553304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.759002685546875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,1,128,1,float16,float16,0,0.2219466765721639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,2,128,1,float16,float16,0,0.2510400017102559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,4,128,1,float16,float16,0,0.33081066608428955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,2.483818689982096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.18896534045537314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.3461546500523885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,16,128,1,float16,float16,0,0.3938346703847249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,8,128,1,float16,float16,0,0.4942026535669963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.07546666761239369
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.2826293309529622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.032885332902272545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.46342933177948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.09091732899347942
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,2,128,1,float16,float16,0,0.11066133777300517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.04479999840259552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,8,128,1,float16,float16,0,0.18425067265828451
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,16,128,1,float16,float16,0,0.08699199557304382
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,4,128,1,float16,float16,0,0.13477333386739096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,1,128,1,float16,float16,0,0.05529599885145823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.18850666284561157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.022826666633288067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.13293332854906717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,2,128,1,float16,float16,0,0.059152002135912575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.04679466784000397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.027466667195161183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,4,128,1,float16,float16,0,0.06730666756629944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.08933333555857341
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,8,128,1,float16,float16,0,0.06798399984836578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,1,128,1,float16,float16,0,0.031727999448776245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.08295466502507527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.017610666652520496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,2,128,1,float16,float16,0,0.03363733241955439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,16,128,1,float16,float16,0,0.03877866764863332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.01953599974513054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,8,128,1,float16,float16,0,0.0377866675456365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.029802667597929638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,16,128,1,float16,float16,0,0.023205332458019257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,4,128,1,float16,float16,0,0.03751466671625773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.04334400097529093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,2,128,1,float16,float16,0,0.020608000457286835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.04472533365090688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.04598933458328247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,1,128,1,float16,float16,0,0.019808000574509304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,4,128,1,float16,float16,0,0.022463999688625336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.07212266822655995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.05203199883302053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,8,128,1,float16,float16,0,0.02256533255179723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,16,128,1,float16,float16,0,0.01392000044385592
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.058431997895240784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,1,128,1,float16,float16,0,0.01249066616098086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,2,128,1,float16,float16,0,0.012442667037248611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,4,128,1,float16,float16,0,0.01350933313369751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.02942933390537898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,1,128,1,float16,float16,0,0.10134399930636089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.041109333435694374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.03408000121514002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,16,128,1,float16,float16,0,0.012042666474978128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,8,128,1,float16,float16,0,0.013440000514189402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.03509866694609324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,1,128,1,float16,float16,0,0.011594666788975397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.02186133215824763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.026320000489552815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,4,128,1,float16,float16,0,0.01173866664369901
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.021925332645575207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,2,128,1,float16,float16,0,0.011717333147923151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,8,128,1,float16,float16,0,0.011861333002646765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.025221332907676697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.02565866708755493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,1,128,1,float16,float16,0,0.4519733190536499
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.4586506684621175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,2,128,1,float16,float16,0,0.5686560074488322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.5258293151855469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.7237813472747803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,16,128,1,float16,float16,0,0.8105599880218506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,4,128,1,float16,float16,0,0.7085920174916586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,8,128,1,float16,float16,0,1.0412373542785645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.7007199923197428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,1,128,1,float16,float16,0,0.21688000361124674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,1.190341313680013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.07549333572387695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.1999680002530416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,8,128,1,float16,float16,0,0.5050026575724283
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.27558932701746625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,2,128,1,float16,float16,0,0.24238399664560953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,16,128,1,float16,float16,0,0.3658613363901774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.45255998770395917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,4,128,1,float16,float16,0,0.32837865749994916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.3024853269259135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,1,128,1,float16,float16,0,0.07965866724650066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,2,128,1,float16,float16,0,0.08990933497746785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.033045334120591484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.04466133316357931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,16,128,1,float16,float16,0,0.07256533205509186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.08061333497365315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,1,128,1,float16,float16,0,0.043663998444875084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,4,128,1,float16,float16,0,0.10844266414642334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.10198400417963664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.17066667477289835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,8,128,1,float16,float16,0,0.1752799948056539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.022682666778564453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,2,128,1,float16,float16,0,0.04783466458320618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.02743999908367793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,4,128,1,float16,float16,0,0.05500266452630361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,16,128,1,float16,float16,0,0.03242133309443792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.04357333481311798
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,1,128,1,float16,float16,0,0.025626666843891144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,8,128,1,float16,float16,0,0.05596266686916351
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.017642666896184284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.06343466540177663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.054383998115857445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,2,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,4,128,1,float16,float16,0,0.031008000175158184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.026778665681680042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,8,128,1,float16,float16,0,0.03166933357715607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,1,128,1,float16,float16,0,0.015194666882356008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.030506665507952373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.036320000886917114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,2,128,1,float16,float16,0,0.01598400001724561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,16,128,1,float16,float16,0,0.019023999571800232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,4,128,1,float16,float16,0,0.018288000176350277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.01580799991885821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,8,128,1,float16,float16,0,0.018506667266289394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,16,128,1,float16,float16,0,0.011477333803971609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.02275199939807256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,1,128,1,float16,float16,0,0.010122666756312052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,2,128,1,float16,float16,0,0.009952000031868616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.013658666362365087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.01379199946920077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,8,128,1,float16,float16,0,0.011125333607196808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.020026666422684986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.018895999838908512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,16,128,1,float16,float16,0,0.009663999701539675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,2,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.013445333888133367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,4,128,1,float16,float16,0,0.009242666885256767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.02144533395767212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,16,128,1,float16,float16,0,0.008890666688481966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,1,128,1,float16,float16,0,0.008565333361426989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,2,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,8,128,1,float16,float16,0,0.009397333487868309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,4,128,1,float16,float16,0,0.008767999708652496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,8,128,1,float16,float16,0,0.008752000207702318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.08206399778525035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,1,128,1,float16,float16,0,0.2164693276087443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,2,128,1,float16,float16,0,0.24116800228754678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.19208000103632608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.27851200103759766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,8,128,1,float16,float16,0,0.5022613207499186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.45158398151397705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,4,128,1,float16,float16,0,0.3279520074526469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,16,128,1,float16,float16,0,0.3659306764602661
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,1,128,1,float16,float16,0,0.07473066449165344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.28321067492167157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.03302400062481562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,2,128,1,float16,float16,0,0.08274133503437042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.04453866680463155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.08065600196520488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.16410133242607117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,4,128,1,float16,float16,0,0.116565336783727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,8,128,1,float16,float16,0,0.18920532862345377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.07766399780909221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.022613334159056347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,16,128,1,float16,float16,0,0.07791466514269511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,2,128,1,float16,float16,0,0.04550399879614512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,4,128,1,float16,float16,0,0.052986666560173035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,8,128,1,float16,float16,0,0.05362133185068766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,16,128,1,float16,float16,0,0.030421334008375805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.05537599821885427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,1,128,1,float16,float16,0,0.02380266785621643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.017477333545684814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.04155733436346054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.04353600243727366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,4,128,1,float16,float16,0,0.02903999884923299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.026709333062171936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,8,128,1,float16,float16,0,0.029696000119050343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.01942933350801468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,16,128,1,float16,float16,0,0.017984000345071156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,2,128,1,float16,float16,0,0.025477332373460133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,1,128,1,float16,float16,0,0.014256000518798828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.03126933425664902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.023930666347344715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,2,128,1,float16,float16,0,0.015168000012636185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,4,128,1,float16,float16,0,0.01725333308180173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.018901333212852478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.015754666179418564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,8,128,1,float16,float16,0,0.01714133347074191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,16,128,1,float16,float16,0,0.01108266661564509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.020058666666348774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,1,128,1,float16,float16,0,0.009679999823371569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,4,128,1,float16,float16,0,0.010725333044926325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.013738666971524557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,2,128,1,float16,float16,0,0.009674666449427605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.013909333695967993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,8,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.016330666840076447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,16,128,1,float16,float16,0,0.009359999870260557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.015706667055686314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,4,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.013530666629473368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,16,128,1,float16,float16,0,0.00873066671192646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.01339200014869372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.012650666137536367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,4,128,1,float16,float16,0,0.008597333605090777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,2,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,8,128,1,float16,float16,0,0.008581333483258883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,16,128,1,float16,float16,0,0.008447999755541483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,1,128,1,float16,float16,0,0.008266666904091835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.012357333054145178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,2,128,1,float16,float16,0,0.008240000034372011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,4,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.012560000022252401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,8,128,1,float16,float16,0,0.008447999755541483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,1,128,1,float16,float16,0,0.125408003727595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.03818133225043615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,2,128,1,float16,float16,0,0.13433067003885904
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.059690664211908974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,4,128,1,float16,float16,0,0.16452266772588095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,8,128,1,float16,float16,0,0.20949333906173706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.11486400167147319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,16,128,1,float16,float16,0,0.09749866525332133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,1,128,1,float16,float16,0,0.0664160003264745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.02664533257484436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.13315199812253317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,2,128,1,float16,float16,0,0.07042666773001353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.032255999743938446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,4,128,1,float16,float16,0,0.07743466893831889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,8,128,1,float16,float16,0,0.07825066645940144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,1,128,1,float16,float16,0,0.03721066564321518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.021317332983016968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.2160373330116272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.08294933537642162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.07017600039641063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,16,128,1,float16,float16,0,0.0440586656332016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.0584853341182073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,2,128,1,float16,float16,0,0.03905600061019262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,4,128,1,float16,float16,0,0.04251733422279358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,8,128,1,float16,float16,0,0.04285866518815359
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,16,128,1,float16,float16,0,0.024506665766239166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,1,128,1,float16,float16,0,0.020970667401949566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.01838933303952217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,2,128,1,float16,float16,0,0.021562665700912476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.04600533346335093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.03858133405447006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.01951466624935468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.03136533250411352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,8,128,1,float16,float16,0,0.023706667125225067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.022863999009132385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,4,128,1,float16,float16,0,0.023733332753181458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,16,128,1,float16,float16,0,0.014490666488806406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.02555199960867564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.021536000072956085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,1,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,2,128,1,float16,float16,0,0.01293333371480306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.01802666609485944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,8,128,1,float16,float16,0,0.013946666071812311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,4,128,1,float16,float16,0,0.013983999689420065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,16,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.019567999988794327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,2,128,1,float16,float16,0,0.008709333216150602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.017594666530688603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,8,128,1,float16,float16,0,0.00897066667675972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.01775466650724411
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,16,128,1,float16,float16,0,0.008421333506703377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,1,128,1,float16,float16,0,0.008362666393319765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,4,128,1,float16,float16,0,0.008314666648705801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,8,128,1,float16,float16,0,0.008287999778985977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,2,128,1,float16,float16,0,0.00820266641676426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.016693333784739178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,16,128,1,float16,float16,0,0.008181333541870117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,1,128,1,float16,float16,0,0.007989333321650824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,2,128,1,float16,float16,0,0.00797333319981893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,4,128,1,float16,float16,0,0.008165333420038223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,8,128,1,float16,float16,0,0.008165333420038223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.01657066618402799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,1,128,1,float16,float16,0,0.00797333319981893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,2,128,1,float16,float16,0,0.008016000191370646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,16,128,1,float16,float16,0,0.008197333042820295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.01661866654952367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,8,128,1,float16,float16,0,0.008063999935984612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,4,128,1,float16,float16,0,0.007946666950980822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.02864533414443334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,1,128,1,float16,float16,0,0.11758933464686076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.045696000258127846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,4,128,1,float16,float16,0,0.129120002190272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.0812960018714269
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,2,128,1,float16,float16,0,0.12140267093976338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,8,128,1,float16,float16,0,0.12957866986592612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,16,128,1,float16,float16,0,0.06877866884072621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.0216799999276797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,2,128,1,float16,float16,0,0.0639519989490509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.10668800274531047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,1,128,1,float16,float16,0,0.06223999957243601
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,4,128,1,float16,float16,0,0.06748799979686737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.025455998877684276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.12018133203188579
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,16,128,1,float16,float16,0,0.03818666686614355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,1,128,1,float16,float16,0,0.034703999757766724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,8,128,1,float16,float16,0,0.06817066669464111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.05991466840108236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.06671466430028279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.04489600161711375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,2,128,1,float16,float16,0,0.03557866563399633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.018581333259741466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,4,128,1,float16,float16,0,0.03731200098991394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.024517332514127094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,16,128,1,float16,float16,0,0.02141333371400833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,8,128,1,float16,float16,0,0.03756800045569738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.020143999407688778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,1,128,1,float16,float16,0,0.019962667177120846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.03640000025431315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.03299733251333237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,2,128,1,float16,float16,0,0.019823999454577763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,4,128,1,float16,float16,0,0.02083733429511388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,1,128,1,float16,float16,0,0.012133333832025528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.020576000213623047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.018677332748969395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,16,128,1,float16,float16,0,0.012730666746695837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,8,128,1,float16,float16,0,0.021013334393501282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,2,128,1,float16,float16,0,0.012266666938861212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,4,128,1,float16,float16,0,0.012458667159080505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,8,128,1,float16,float16,0,0.012549333274364471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,16,128,1,float16,float16,0,0.008463999877373377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,1,128,1,float16,float16,0,0.008383999889095625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,4,128,1,float16,float16,0,0.008346666892369589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,2,128,1,float16,float16,0,0.008352000266313553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.016597333053747814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,8,128,1,float16,float16,0,0.00842666688064734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,16,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,1,128,1,float16,float16,0,0.008016000191370646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,2,128,1,float16,float16,0,0.008016000191370646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.01658133293191592
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,4,128,1,float16,float16,0,0.008005333443482717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.017029333859682083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,8,128,1,float16,float16,0,0.008074666683872541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,16,128,1,float16,float16,0,0.008000000069538752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.016282666474580765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,1,128,1,float16,float16,0,0.007840000092983246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,2,128,1,float16,float16,0,0.00795199970404307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.016688000410795212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.016490666816631954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,4,128,1,float16,float16,0,0.007936000203092894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,8,128,1,float16,float16,0,0.00795199970404307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,16,128,1,float16,float16,0,0.008069333309928576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,1,128,1,float16,float16,0,0.007813333223263422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,2,128,1,float16,float16,0,0.007749333356817563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.016549333930015564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,4,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,8,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,1,128,1,float16,float16,0,0.013338666409254074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,2,128,1,float16,float16,0,0.025360000630219776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.015557333827018738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.021695998807748158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,4,128,1,float16,float16,0,0.039450667798519135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,8,128,1,float16,float16,0,0.06398400167624156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.03344533344109853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,16,128,1,float16,float16,0,0.056645333766937256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,1,128,1,float16,float16,0,0.009599999835093817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.05162133276462555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.014671999961137772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,2,128,1,float16,float16,0,0.017258666455745697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.052704001466433205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,4,128,1,float16,float16,0,0.024256000916163128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,16,128,1,float16,float16,0,0.03222399950027466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.03181866556406021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.02075733368595441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,8,128,1,float16,float16,0,0.03815466662247976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,1,128,1,float16,float16,0,0.009375999992092451
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.03238933285077413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.011930666863918304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,2,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.012261333564917246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,16,128,1,float16,float16,0,0.018661333868900936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,4,128,1,float16,float16,0,0.016549333930015564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,8,128,1,float16,float16,0,0.02383466561635335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.02038399999340375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.019871999820073444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.014314666390419006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.01146666705608368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,2,128,1,float16,float16,0,0.012703999876976013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.011605333536863327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.01163200040658315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,16,128,1,float16,float16,0,0.011653333902359009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,1,128,1,float16,float16,0,0.008832000195980072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.013765333841244379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.013925333817799887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,8,128,1,float16,float16,0,0.016421332955360413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,2,128,1,float16,float16,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.01139733319481214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,4,128,1,float16,float16,0,0.012624000509579977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.013749333719412485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,2,128,1,float16,float16,0,0.01219733307758967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,16,128,1,float16,float16,0,0.007967999825874964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,4,128,1,float16,float16,0,0.012250666817029318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,8,128,1,float16,float16,0,0.012383999923865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,16,128,1,float16,float16,0,0.007802666475375493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,2,128,1,float16,float16,0,0.012074666718641916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,4,128,1,float16,float16,0,0.012181332955757776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,8,128,1,float16,float16,0,0.012186666329701742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,16,128,1,float16,float16,0,0.007818666597207388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,1,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,2,128,1,float16,float16,0,0.011999999483426413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.010842667271693548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,8,128,1,float16,float16,0,0.012074666718641916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,4,128,1,float16,float16,0,0.01191466674208641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,16,128,1,float16,float16,0,0.007658666620651881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,1,128,1,float16,float16,0,0.008458666503429413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.010549332946538925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,4,128,1,float16,float16,0,0.008538666491707167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.011594666788975397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,8,128,1,float16,float16,0,0.008672000219424566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,2,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,2.55459197362264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,5.481717427571614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,1.3265759944915771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,20.142655690511067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,12,128,1,float16,float16,0,20.75162633260091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.9390080769856772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,1,128,1,float16,float16,0,19.46384048461914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,10.456229527791342
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,2,128,1,float16,float16,0,19.308223724365234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,12,128,1,float16,float16,0,10.101045608520508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,0.7066400051116943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,1,128,1,float16,float16,0,39.056864420572914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,2,128,1,float16,float16,0,39.99824015299479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,4,128,1,float16,float16,0,40.744181315104164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.6312692960103352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,4,128,1,float16,float16,0,20.755786895751953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,1,128,1,float16,float16,0,9.256314595540365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,42.89947509765625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,12,128,1,float16,float16,0,4.513781229654948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,4.975791931152344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.31785066922505695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,2,128,1,float16,float16,0,9.845034917195639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.8915253480275472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,4,128,1,float16,float16,0,9.866437276204428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,1,128,1,float16,float16,0,4.459418614705403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,2.73688538869222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,2,128,1,float16,float16,0,4.588794708251953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,4,128,1,float16,float16,0,4.391269365946452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,1.745898723602295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,10.801712036132812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,3.4448960622151694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,21.166831970214844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,0.9487626552581787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,12.123274485270182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,12,128,1,float16,float16,0,11.93679936726888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,1,128,1,float16,float16,0,11.21954091389974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.845962683359782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,2,128,1,float16,float16,0,11.442848205566406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,1,128,1,float16,float16,0,23.15331268310547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,5.7769120534261065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,12,128,1,float16,float16,0,5.625872294108073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,2,128,1,float16,float16,0,22.678538004557293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.5069119930267334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,4,128,1,float16,float16,0,23.77599589029948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,1.103375991185506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,4,128,1,float16,float16,0,12.322901407877604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,1,128,1,float16,float16,0,5.825386683146159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,3.0201492309570312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,12,128,1,float16,float16,0,2.4508320490519204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,2,128,1,float16,float16,0,5.719104131062825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,26.337178548177082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.1951786677042643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,4,128,1,float16,float16,0,5.593317031860352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.5945706764856974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,1,128,1,float16,float16,0,2.7127307256062827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,1.5196000734965007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,2,128,1,float16,float16,0,2.287775993347168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.3975839614868164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,4,128,1,float16,float16,0,2.5719146728515625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,12.789162953694662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,6.20199457804362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,2.5919787089029946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.673413356145223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,1,128,1,float16,float16,0,7.520005544026692
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,7.967039744059245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,12,128,1,float16,float16,0,8.556042353312174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,1.490437348683675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,2,128,1,float16,float16,0,7.664384206136067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,1,128,1,float16,float16,0,15.701674143473307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,2,128,1,float16,float16,0,16.63370641072591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,3.9887574513753257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,12,128,1,float16,float16,0,3.8093865712483725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,4,128,1,float16,float16,0,15.681045532226562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.2707786758740743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,4,128,1,float16,float16,0,8.05949846903483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.8659733136494955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,17.86642074584961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,1,128,1,float16,float16,0,3.508746782938639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,2,128,1,float16,float16,0,3.4929653803507485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,4,128,1,float16,float16,0,3.373274803161621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.17045332988103232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,1,128,1,float16,float16,0,1.8690133094787598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,12,128,1,float16,float16,0,1.8770400683085124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,8.108767827351889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,2.2585973739624023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.48740267753601074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,2,128,1,float16,float16,0,1.7726880709330242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,4,128,1,float16,float16,0,1.6714612642923992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,1.089402675628662
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,4.169925371805827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.1031840642293296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,3.625589370727539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,0.9970613320668539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,10.720784505208334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,12,128,1,float16,float16,0,11.223594665527344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,1,128,1,float16,float16,0,10.268794377644857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,2.1017227172851562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,2,128,1,float16,float16,0,10.604746500651041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,2,128,1,float16,float16,0,20.775482177734375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,1,128,1,float16,float16,0,21.522532145182293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,12,128,1,float16,float16,0,4.957920074462891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,5.214810689290364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.5851360162099203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,4,128,1,float16,float16,0,22.0428949991862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,1.0202346642812092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,4,128,1,float16,float16,0,10.747365315755209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,23.021947224934895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,1,128,1,float16,float16,0,4.722832043965657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,2.7746880849202475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.21442667643229166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,2,128,1,float16,float16,0,4.507120132446289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,12,128,1,float16,float16,0,2.283221403757731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,4,128,1,float16,float16,0,4.788768132527669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.4537706772486369
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,1,128,1,float16,float16,0,2.226410706837972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,2,128,1,float16,float16,0,2.1346774101257324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,12,128,1,float16,float16,0,1.386298656463623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,1.3673386573791504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,10.866527557373047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.1184213360150655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,4,128,1,float16,float16,0,2.3013173739115396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.28540800015131634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,1,128,1,float16,float16,0,1.1607360045115154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,2,128,1,float16,float16,0,1.0725706418355305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,5.187397321065267
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.7560479640960693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,4,128,1,float16,float16,0,1.3610186576843262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,2.4926719665527344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.4016480445861816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,2.4094133377075195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.686352014541626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,12,128,1,float16,float16,0,5.896928151448567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,1,128,1,float16,float16,0,4.968832015991211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,6.287008285522461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,1.242517312367757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,1,128,1,float16,float16,0,11.20623524983724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,2,128,1,float16,float16,0,5.220005353291829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,3.123728116353353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,2,128,1,float16,float16,0,12.137808481852213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,4,128,1,float16,float16,0,12.537012736002604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.33636800448099774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,12,128,1,float16,float16,0,2.7707627614339194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,4,128,1,float16,float16,0,5.6786238352457685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,13.194352467854818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.7175467014312744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,1,128,1,float16,float16,0,2.9139254887898765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.14222400387128195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,1.7158080736796062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,4,128,1,float16,float16,0,2.474661350250244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,12,128,1,float16,float16,0,1.5324692726135254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,1,128,1,float16,float16,0,1.2332533200581868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,2,128,1,float16,float16,0,2.3828320503234863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.3205813368161519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,2,128,1,float16,float16,0,1.458074728647868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,1,128,1,float16,float16,0,0.6350880066553751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,6.159701029459636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,4,128,1,float16,float16,0,1.2815039952596028
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.8899412949879965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,12,128,1,float16,float16,0,0.7184426784515381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,3.030618667602539
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.09983999530474345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,2,128,1,float16,float16,0,0.6323946714401245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.21644800901412964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,4,128,1,float16,float16,0,0.6583733161290487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.5401279926300049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,1.5893707275390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.741653283437093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,2.7394186655680337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,12,128,1,float16,float16,0,5.497946421305339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,6.060127894083659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,1,128,1,float16,float16,0,4.682469367980957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.9206293423970541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,2,128,1,float16,float16,0,11.580059051513672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,1,128,1,float16,float16,0,10.66148821512858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,4,128,1,float16,float16,0,11.267183939615885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,1.3951733907063801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,2,128,1,float16,float16,0,4.950181325276692
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.4468746582667033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,3.2456159591674805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,12,128,1,float16,float16,0,2.873114585876465
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,1,128,1,float16,float16,0,2.093536059061686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,12.025936126708984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.6786186695098877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,4,128,1,float16,float16,0,4.568570772806804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,2,128,1,float16,float16,0,2.162538687388102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,1.5191574096679688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,1,128,1,float16,float16,0,1.0971733729044597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.11590933799743652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,4,128,1,float16,float16,0,2.3163572947184243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,5.58685302734375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.2603360017140706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,12,128,1,float16,float16,0,1.4486667315165203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,2,128,1,float16,float16,0,1.2751253445943196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,2.7450507481892905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,4,128,1,float16,float16,0,1.1969866752624512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,1,128,1,float16,float16,0,0.5651040077209473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,12,128,1,float16,float16,0,0.7613013585408529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.7992213567097982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,2,128,1,float16,float16,0,0.6234986782073975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.181658665339152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,4,128,1,float16,float16,0,0.5998080174128214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.44686933358510333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,1.469173272450765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,12,128,1,float16,float16,0,0.37297598520914715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.06725866595904033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,1,128,1,float16,float16,0,0.3312000036239624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.060266668597857155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,2,128,1,float16,float16,0,0.32756266991297406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.09687466422716777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,4,128,1,float16,float16,0,0.3386666774749756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.31621867418289185
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.7525066534678141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,1.8379146258036296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,1.2512319882710774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.5919946829477946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,12,128,1,float16,float16,0,3.4150613149007163
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,2,128,1,float16,float16,0,5.457861582438151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,1,128,1,float16,float16,0,2.405381361643473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,3.884362538655599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,4,128,1,float16,float16,0,5.61732800801595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,1,128,1,float16,float16,0,5.340053558349609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.9461973508199056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,2,128,1,float16,float16,0,2.5707200368245444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,6.978122711181641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.25526400407155353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,12,128,1,float16,float16,0,1.7072319984436035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,1,128,1,float16,float16,0,1.484389305114746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,1.8968267440795898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,4,128,1,float16,float16,0,3.0237439473470054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,2,128,1,float16,float16,0,1.2739360332489014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.47699201107025146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,3.4851147333780923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.08390399813652039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.9544053077697754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,12,128,1,float16,float16,0,0.8481173515319824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,1,128,1,float16,float16,0,0.6428159872690836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.1973386605580648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,4,128,1,float16,float16,0,1.3697013854980469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,2,128,1,float16,float16,0,0.6823039849599203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,1.541386604309082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,4,128,1,float16,float16,0,0.7053066889444987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.4633653163909912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,12,128,1,float16,float16,0,0.4405653476715088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.05483733117580414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,1,128,1,float16,float16,0,0.3633120059967041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,2,128,1,float16,float16,0,0.35014931360880536
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.12089066704114278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,4,128,1,float16,float16,0,0.3632853428522746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.7779839833577474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.27933333317438763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,1,128,1,float16,float16,0,0.2135039965311686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.042506664991378784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,2,128,1,float16,float16,0,0.20728000005086264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,12,128,1,float16,float16,0,0.21987199783325195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.07338133454322815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,4,128,1,float16,float16,0,0.21465067068735758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.20329066117604574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.5077173312505087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,1.601103941599528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,12,128,1,float16,float16,0,3.5611305236816406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,4.223504066467285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,2.2122079531351724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,4,128,1,float16,float16,0,6.117861429850261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,1,128,1,float16,float16,0,5.598927815755208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.8254240353902181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,1,128,1,float16,float16,0,2.416917324066162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,2,128,1,float16,float16,0,5.1560319264729815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,6.711813608805339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,1.0896106561024983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,2,128,1,float16,float16,0,2.438197294871012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,12,128,1,float16,float16,0,1.7316160202026367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,4,128,1,float16,float16,0,2.825786590576172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.3756320079167684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,1,128,1,float16,float16,0,1.214570681254069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,2.1454346974690757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,4,128,1,float16,float16,0,1.4278720219930012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.5289386510848999
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,2,128,1,float16,float16,0,1.2235466639200847
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,1.0261546770731609
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,3.0281333923339844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,12,128,1,float16,float16,0,0.9047040144602457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,1,128,1,float16,float16,0,0.644869327545166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.16974933942159018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,2,128,1,float16,float16,0,0.6253226598103842
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.07365866502126057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,4,128,1,float16,float16,0,0.6936000188191732
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,12,128,1,float16,float16,0,0.45212801297505695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,1.5904906590779622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.4899413188298543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.04654933512210846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,1,128,1,float16,float16,0,0.3138773242632548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,2,128,1,float16,float16,0,0.3271733323733012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,4,128,1,float16,float16,0,0.34825066725413006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.7994933128356934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.2600319981575012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.10616532961527507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,12,128,1,float16,float16,0,0.19500267505645752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,1,128,1,float16,float16,0,0.179365336894989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.04020266731580099
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,2,128,1,float16,float16,0,0.18487467368443808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.05891199906667074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.3972853422164917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,12,128,1,float16,float16,0,0.11505066355069478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,4,128,1,float16,float16,0,0.19954667488733926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.14784000317255655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.03314133236805598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.2737706700960795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.052426666021347046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,1,128,1,float16,float16,0,0.10685333609580994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.09613333145777385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,2,128,1,float16,float16,0,0.10742933551470439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,4,128,1,float16,float16,0,0.1160426636536916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,1.1761706670125325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,12,128,1,float16,float16,0,2.3347466786702475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,1,128,1,float16,float16,0,3.067824045817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,2.8101441065470376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,4,128,1,float16,float16,0,3.401551882425944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,2,128,1,float16,float16,0,3.1419893900553384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,1.5652319590250652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,1,128,1,float16,float16,0,1.3761812845865886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.5598880052566528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.7569119930267334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,2,128,1,float16,float16,0,1.4862613677978516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,3.9166507720947266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,1,128,1,float16,float16,0,0.733903964360555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,4,128,1,float16,float16,0,1.6383679707845051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.21625065803527832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,1.4386293093363445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,12,128,1,float16,float16,0,1.1592480341593425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.3660106658935547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,1.971962610880534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,2,128,1,float16,float16,0,0.7648586432139078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,12,128,1,float16,float16,0,0.5624959866205851
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.63591468334198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,4,128,1,float16,float16,0,0.8987840016682943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,4,128,1,float16,float16,0,0.4359573523203532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.30979732672373456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,2,128,1,float16,float16,0,0.39684800306955975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,1,128,1,float16,float16,0,0.3672586679458618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.11285866300264995
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.058703998724619545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,1,128,1,float16,float16,0,0.19578667481740317
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,12,128,1,float16,float16,0,0.2829759915669759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.9927946726481119
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,2,128,1,float16,float16,0,0.2050559918085734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.07274666428565979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.432645320892334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.035360001027584076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,12,128,1,float16,float16,0,0.12285866340001424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.16459733247756958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,4,128,1,float16,float16,0,0.21939200162887573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.02938133229811986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,2,128,1,float16,float16,0,0.11462400356928508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.04595733185609182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,1,128,1,float16,float16,0,0.11168533563613892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.11546666423479716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,4,128,1,float16,float16,0,0.12227200468381245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,12,128,1,float16,float16,0,0.08356266220410664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,1,128,1,float16,float16,0,0.0774239997069041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.040693332751592
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.2717653314272563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,4,128,1,float16,float16,0,0.08302933474381764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.15366400281588236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,2,128,1,float16,float16,0,0.0782239983479182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.07225066423416138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,12,128,1,float16,float16,0,2.5220905939737954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,1.5935947100321453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,1.9946133295694988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,1,128,1,float16,float16,0,2.902581214904785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,2,128,1,float16,float16,0,2.958101272583008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,1,128,1,float16,float16,0,1.3547147115071614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,4.053930600484212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,4,128,1,float16,float16,0,3.451829274495443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,3.174858729044596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.7755413055419922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,2,128,1,float16,float16,0,1.4712212880452473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.9671573638916016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,4,128,1,float16,float16,0,1.7157066663106282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,1,128,1,float16,float16,0,0.7113760312398275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.349727988243103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,2,128,1,float16,float16,0,0.7705279986063639
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,1.6653067270914714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.43298133214314777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,12,128,1,float16,float16,0,0.6138079961140951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.7574559847513834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.06112533311049143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.9917333126068115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,4,128,1,float16,float16,0,0.8743626276652018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,2,128,1,float16,float16,0,0.3950666586558024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.1241919994354248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,4,128,1,float16,float16,0,0.4517813523610433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.34406399726867676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,1,128,1,float16,float16,0,0.17996267477671304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,12,128,1,float16,float16,0,1.255834658940633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.035973332822322845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.4912213484446208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,2,128,1,float16,float16,0,0.18965333700180054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.0643146683772405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,12,128,1,float16,float16,0,0.11496532956759135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,1,128,1,float16,float16,0,0.3807733456293742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,12,128,1,float16,float16,0,0.31198400259017944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.1669173240661621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,1,128,1,float16,float16,0,0.10100799798965454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,4,128,1,float16,float16,0,0.2221119999885559
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.027727998793125153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,2,128,1,float16,float16,0,0.10547199845314026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,2.0872586568196616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,4,128,1,float16,float16,0,0.11359999577204387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.04181866844495138
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.23448532819747925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,1,128,1,float16,float16,0,0.05853333572546641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.09903466701507568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,2,128,1,float16,float16,0,0.06347733239332835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.15550399820009866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.034741332133611046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.022944000860055287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,12,128,1,float16,float16,0,0.07248533268769582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,12,128,1,float16,float16,0,0.039706667264302574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.05906666815280914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,4,128,1,float16,float16,0,0.07444266478220622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.09437333544095357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,1,128,1,float16,float16,0,0.03658666710058848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.04341333111127218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,4,128,1,float16,float16,0,0.03928533444801966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.05468800167242686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,2,128,1,float16,float16,0,0.0373333344856898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.0653706689675649
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,12,128,1,float16,float16,0,2.4133013089497886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,3.142143885294596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,1.565658728281657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,1,128,1,float16,float16,0,2.445727984110514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,1.9669547080993652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.8494880199432373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,1,128,1,float16,float16,0,0.8839200337727865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,2,128,1,float16,float16,0,1.0263306299845378
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,2,128,1,float16,float16,0,2.630943934122721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.9610453446706136
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,4,128,1,float16,float16,0,3.1392107009887695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,2.8804801305135093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,4,128,1,float16,float16,0,1.3309226830800374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.3417919874191284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,12,128,1,float16,float16,0,1.2125653425852458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,1.5192160606384277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,12,128,1,float16,float16,0,0.5976853370666504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,4,128,1,float16,float16,0,0.6633706490198771
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,1,128,1,float16,float16,0,0.4453546603520711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,1.4051733016967773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.6891146500905355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,1,128,1,float16,float16,0,0.23823465903600058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.6315146684646606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,2,128,1,float16,float16,0,0.26339733600616455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.40573867162068683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.05305066704750061
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,2,128,1,float16,float16,0,0.5194026629130045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,4,128,1,float16,float16,0,0.3254186709721883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.26331732670466107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.03246400008598963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,1,128,1,float16,float16,0,0.11151466766993205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.10103467106819153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,12,128,1,float16,float16,0,0.24291199445724487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.3457706769307454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,2,128,1,float16,float16,0,0.12129599849383037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.047775998711586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,4,128,1,float16,float16,0,0.14214932918548584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,12,128,1,float16,float16,0,0.07620266576608022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.12379733721415202
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,1,128,1,float16,float16,0,0.06481066842873891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,2,128,1,float16,float16,0,0.07287466526031494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.15471999843915304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.02447466552257538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.03201066702604294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,4,128,1,float16,float16,0,0.08037866652011871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.06272533535957336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,1,128,1,float16,float16,0,0.036277333895365395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,12,128,1,float16,float16,0,0.042730664213498436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.10193066795667012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,4,128,1,float16,float16,0,0.04201066493988037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,2,128,1,float16,float16,0,0.037658666570981346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.020096000283956528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.02518400053183238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,12,128,1,float16,float16,0,0.02757333219051361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.042319998145103455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,1,128,1,float16,float16,0,0.02420266717672348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.03991466760635376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.07046400010585785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,2,128,1,float16,float16,0,0.025216000775496166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.04507733384768168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,1,128,1,float16,float16,0,0.016938666502634685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.051514665285746254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,4,128,1,float16,float16,0,0.027072000006834667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.029002666473388672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.04026666780312856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,2,128,1,float16,float16,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.029818666477998097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,12,128,1,float16,float16,0,0.018474667022625606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,4,128,1,float16,float16,0,0.018112000077962875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.031685332457224526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.9677973588307699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,1,128,1,float16,float16,0,0.7253226439158121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.7846079667409261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,2,128,1,float16,float16,0,0.9667680263519287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,4,128,1,float16,float16,0,1.300101359685262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,12,128,1,float16,float16,0,1.2203306357065837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,1.4282506306966145
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,1,128,1,float16,float16,0,0.33631467819213867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.3447360197703044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,2,128,1,float16,float16,0,0.4229280153910319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,1.2199680010477703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.4119199911753337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.0505973349014918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,1,128,1,float16,float16,0,0.17454934120178223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.5401386817296346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,12,128,1,float16,float16,0,0.5912426710128784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.10875200231870015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,4,128,1,float16,float16,0,0.5978453159332275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.23648534218470255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,4,128,1,float16,float16,0,0.2704319953918457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,12,128,1,float16,float16,0,0.22054932514826456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,1,128,1,float16,float16,0,0.07962666451931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.6149919827779134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,2,128,1,float16,float16,0,0.08861333131790161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.25514666239420575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.04410133262475332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.09707732995351155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.02994133283694585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,2,128,1,float16,float16,0,0.04744000236193339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,4,128,1,float16,float16,0,0.10623466968536377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.10868799686431885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,4,128,1,float16,float16,0,0.055061335364977516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.02834133307139079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.05035200218359629
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,2,128,1,float16,float16,0,0.1990399956703186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,12,128,1,float16,float16,0,0.05736533304055532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,1,128,1,float16,float16,0,0.026005332668622334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,12,128,1,float16,float16,0,0.03258133431275686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,2,128,1,float16,float16,0,0.027845333019892376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,1,128,1,float16,float16,0,0.043680002291997276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,12,128,1,float16,float16,0,0.02053333322207133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.021749332547187805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,1,128,1,float16,float16,0,0.01722666621208191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.06664533416430156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.04167999823888143
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.03717333326737086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,2,128,1,float16,float16,0,0.0180479995906353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,4,128,1,float16,float16,0,0.03183999905983607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,4,128,1,float16,float16,0,0.0200853335360686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.06047466893990835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,12,128,1,float16,float16,0,0.013455999394257864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,1,128,1,float16,float16,0,0.011952000359694162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,2,128,1,float16,float16,0,0.012106666962305704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.04491200049718221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.03180799881617228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.02624533325433731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.03681600093841553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.029279999434947968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.031141333281993866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,12,128,1,float16,float16,0,0.012074666718641916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.020608000457286835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.023706667125225067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,4,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,1,128,1,float16,float16,0,0.011434666812419891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,2,128,1,float16,float16,0,0.011519999553759893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.024362665911515553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,4,128,1,float16,float16,0,0.011610666910807291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.02367466688156128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,1,128,1,float16,float16,0,0.3354399998982747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.3498293161392212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,12,128,1,float16,float16,0,0.5933599869410197
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,2,128,1,float16,float16,0,0.45132799943288165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.6185493469238281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.41173334916432697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,4,128,1,float16,float16,0,0.6406773328781128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,1,128,1,float16,float16,0,0.1604639987150828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.5248213211695353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,2,128,1,float16,float16,0,0.18266665935516357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,12,128,1,float16,float16,0,0.21921066443125406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,4,128,1,float16,float16,0,0.26847465833028156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.08565333485603333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,1,128,1,float16,float16,0,0.06345599889755249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.05113600194454193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.23056000471115112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.2103466590245565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,2,128,1,float16,float16,0,0.07122133175532024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.029904000461101532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,12,128,1,float16,float16,0,0.04854933420817057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,4,128,1,float16,float16,0,0.08720533053080241
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.08583999673525493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.02109866589307785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.0414986660083135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,2,128,1,float16,float16,0,0.039962666730086006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,4,128,1,float16,float16,0,0.04716266691684723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.08505066235860188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,12,128,1,float16,float16,0,0.02777066578467687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.046896000703175865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.026122666895389557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.016719999412695568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.04488533238569895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,1,128,1,float16,float16,0,0.03566399961709976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,4,128,1,float16,float16,0,0.027162666122118633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,1,128,1,float16,float16,0,0.021402666966120403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,12,128,1,float16,float16,0,0.017610666652520496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.028586665789286297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.01859733338157336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,1,128,1,float16,float16,0,0.014522666732470194
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.028858666618665058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,2,128,1,float16,float16,0,0.023039999107519787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.014432000617186228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,4,128,1,float16,float16,0,0.017466666797796886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,2,128,1,float16,float16,0,0.015306666493415833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.015290666371583939
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.02178666740655899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,1,128,1,float16,float16,0,0.00961599995692571
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,12,128,1,float16,float16,0,0.01108266661564509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.013594667116800943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.013647999614477158
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,2,128,1,float16,float16,0,0.009610666582981745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,12,128,1,float16,float16,0,0.009450666606426239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,4,128,1,float16,float16,0,0.010522666076819101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,4,128,1,float16,float16,0,0.00926399976015091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.016890666137139004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.016735999534527462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,12,128,1,float16,float16,0,0.008757333581646284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,2,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,4,128,1,float16,float16,0,0.008757333581646284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,12,128,1,float16,float16,0,0.22233599424362183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.18080532550811768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.05152533451716105
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,2,128,1,float16,float16,0,0.18158400058746338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,1,128,1,float16,float16,0,0.1546933352947235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.08516266942024231
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,4,128,1,float16,float16,0,0.26765867074330646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.2328959902127584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,12,128,1,float16,float16,0,0.04634666442871094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,2,128,1,float16,float16,0,0.06692266464233398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,4,128,1,float16,float16,0,0.08226666847864787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.04080000023047129
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.07762133578459422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.06181866427262624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,1,128,1,float16,float16,0,0.059119999408721924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.030495998760064442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,2,128,1,float16,float16,0,0.03743999948104223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,1,128,1,float16,float16,0,0.03359466542800268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.025834667185942333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,12,128,1,float16,float16,0,0.026015999416510265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.042319998145103455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,4,128,1,float16,float16,0,0.04477333525816599
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.018565333137909572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,1,128,1,float16,float16,0,0.01950399950146675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.0371573343873024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,2,128,1,float16,float16,0,0.02128000060717265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,4,128,1,float16,float16,0,0.025360000630219776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,1,128,1,float16,float16,0,0.013829333086808523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.0258240004380544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,2,128,1,float16,float16,0,0.014858666807413101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,12,128,1,float16,float16,0,0.0173333336909612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,4,128,1,float16,float16,0,0.016751999656359356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.01431999976436297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.022170667846997578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.018394666413466137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,12,128,1,float16,float16,0,0.010773333410422007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.015423999478419622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,1,128,1,float16,float16,0,0.009296000003814697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.013424000392357508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.015471999843915304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,2,128,1,float16,float16,0,0.009408000235756239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,12,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,4,128,1,float16,float16,0,0.010405333091815313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,4,128,1,float16,float16,0,0.00903466654320558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,12,128,1,float16,float16,0,0.008656000097592672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,2,128,1,float16,float16,0,0.008672000219424566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,2,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.012565333396196365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,4,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,12,128,1,float16,float16,0,0.008576000109314919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,1,128,1,float16,float16,0,0.008389333263039589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,2,128,1,float16,float16,0,0.008282666405042013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,4,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.012362666428089142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.012448000411192576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.012576000144084295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,12,128,1,float16,float16,0,0.06453866759936015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,1,128,1,float16,float16,0,0.097653329372406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,2,128,1,float16,float16,0,0.10570666193962097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.03510399907827377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.10690666238466899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,4,128,1,float16,float16,0,0.12088533242543538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.05643733342488607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.10124267141024272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,1,128,1,float16,float16,0,0.051589335004488625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,2,128,1,float16,float16,0,0.0551146666208903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.024959998826185863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,4,128,1,float16,float16,0,0.06277333199977875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,12,128,1,float16,float16,0,0.036144000788529716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.05678399900595347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.055104002356529236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,1,128,1,float16,float16,0,0.029616000751654308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,2,128,1,float16,float16,0,0.0316746657093366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,12,128,1,float16,float16,0,0.020432000358899433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.031136001149813335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,1,128,1,float16,float16,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,4,128,1,float16,float16,0,0.035301332672437034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.01783466711640358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.03660800059636434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,2,128,1,float16,float16,0,0.017978666971127193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,4,128,1,float16,float16,0,0.019882666567961376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,12,128,1,float16,float16,0,0.014090667168299357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.022448000808556873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,1,128,1,float16,float16,0,0.012714666624863943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,2,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.01803733284274737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,12,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,4,128,1,float16,float16,0,0.013701333353916803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.02032533288002014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.018735999862353008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,2,128,1,float16,float16,0,0.008576000109314919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,12,128,1,float16,float16,0,0.008416000132759413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.01670933390657107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,2,128,1,float16,float16,0,0.008266666904091835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,1,128,1,float16,float16,0,0.008250666782259941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.01637866720557213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,4,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,1,128,1,float16,float16,0,0.008063999935984612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.016565332810084026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,12,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,2,128,1,float16,float16,0,0.00810666692753633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,4,128,1,float16,float16,0,0.008047999814152718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,12,128,1,float16,float16,0,0.008266666904091835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,1,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.017077332983414333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,2,128,1,float16,float16,0,0.008026666939258575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,4,128,1,float16,float16,0,0.008127999802430471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,12,128,1,float16,float16,0,0.05409066875775655
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,2,128,1,float16,float16,0,0.09470400214195251
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,1,128,1,float16,float16,0,0.09072533249855042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.026608000199000042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.044122666120529175
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,4,128,1,float16,float16,0,0.10211732983589172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.0788320004940033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.08463467160860698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,1,128,1,float16,float16,0,0.04781333108743032
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.021002667645613354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,2,128,1,float16,float16,0,0.049584001302719116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.024298667907714844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,4,128,1,float16,float16,0,0.05340266724427541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,12,128,1,float16,float16,0,0.03062933435042699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.04297066728274027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,1,128,1,float16,float16,0,0.0276053324341774
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.04678933322429657
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,2,128,1,float16,float16,0,0.02828266719977061
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.017984000345071156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.019626667102177937
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,4,128,1,float16,float16,0,0.030159999926884968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.023669332265853882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,12,128,1,float16,float16,0,0.01759999990463257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.017077332983414333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,2,128,1,float16,float16,0,0.016282666474580765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.031850665807724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,1,128,1,float16,float16,0,0.016271999726692837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,4,128,1,float16,float16,0,0.01727466657757759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,12,128,1,float16,float16,0,0.012549333274364471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.018122666825850803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.01883200059334437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,1,128,1,float16,float16,0,0.012154666086037954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.01661866654952367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.01758933315674464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,2,128,1,float16,float16,0,0.012223999947309494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,4,128,1,float16,float16,0,0.012410666793584824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,1,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,12,128,1,float16,float16,0,0.0085333331177632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,2,128,1,float16,float16,0,0.008223999912540117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.016613333175579708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,4,128,1,float16,float16,0,0.008346666892369589
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,1,128,1,float16,float16,0,0.008090666805704435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.01616000011563301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,12,128,1,float16,float16,0,0.008069333309928576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,2,128,1,float16,float16,0,0.008021333565314611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.016805333395799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,12,128,1,float16,float16,0,0.008000000069538752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,1,128,1,float16,float16,0,0.00786666696270307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,4,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,2,128,1,float16,float16,0,0.007871999715765318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,4,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.01628799984852473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,12,128,1,float16,float16,0,0.007994666695594788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,2,128,1,float16,float16,0,0.007754666730761528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,1,128,1,float16,float16,0,0.00786666696270307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,4,128,1,float16,float16,0,0.00790933333337307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,12,128,1,float16,float16,0,0.043824002146720886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.04075733323891958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,1,128,1,float16,float16,0,0.013487999637921652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,2,128,1,float16,float16,0,0.025050667424996693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,4,128,1,float16,float16,0,0.039290666580200195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.021407999098300934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.03291733314593633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,1,128,1,float16,float16,0,0.009599999835093817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,12,128,1,float16,float16,0,0.02550933261712392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,2,128,1,float16,float16,0,0.017157333592573803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.02586666742960612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.014442666123310724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,4,128,1,float16,float16,0,0.024288001159826916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.020527999848127365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,1,128,1,float16,float16,0,0.00914666677514712
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,2,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.011941333611806234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,4,128,1,float16,float16,0,0.016656000167131424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.0120319997270902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,12,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.01569066693385442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,1,128,1,float16,float16,0,0.00890666681031386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,2,128,1,float16,float16,0,0.01259200026591619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.01166933278242747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,4,128,1,float16,float16,0,0.012847999731699625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.011653333902359009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,12,128,1,float16,float16,0,0.01146666705608368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,2,128,1,float16,float16,0,0.012319999436537424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.013679999858140945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,4,128,1,float16,float16,0,0.012527999778588613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,12,128,1,float16,float16,0,0.008026666939258575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,2,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,4,128,1,float16,float16,0,0.01228800043463707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,12,128,1,float16,float16,0,0.007914666707317034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,2,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,4,128,1,float16,float16,0,0.012181332955757776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.01138666644692421
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,12,128,1,float16,float16,0,0.00761600024998188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,1,128,1,float16,float16,0,0.008570666735370954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,2,128,1,float16,float16,0,0.011957333733638128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,4,128,1,float16,float16,0,0.012128000458081564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,12,128,1,float16,float16,0,0.007658666620651881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,1,128,1,float16,float16,0,0.008458666503429413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,4,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.011349332829316458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.6754719416300454
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,7.53877321879069
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.4943733215332031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,14.200026194254557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,3.905381202697754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,2,128,1,float16,float16,0,12.49462890625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,1,128,1,float16,float16,0,12.636224110921225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,2,128,1,float16,float16,0,26.421712239583332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,1,128,1,float16,float16,0,26.39904022216797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,4,128,1,float16,float16,0,27.47992451985677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,29.626693725585938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,28.195897420247395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,4,128,1,float16,float16,0,13.755994160970053
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.7798293431599935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,6.438490549723308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,1.9512426058451335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,1,128,1,float16,float16,0,6.182320276896159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,12.976266225179037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,14.760885874430338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,2,128,1,float16,float16,0,6.321333567301433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,4,128,1,float16,float16,0,6.308949152628581
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,3.0649706522623696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,14.48086929321289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.39129066467285156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,6.9429066975911455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,6.569450378417969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,1,128,1,float16,float16,0,3.062191963195801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,1.147648016611735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,2,128,1,float16,float16,0,2.925648053487142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,4,128,1,float16,float16,0,2.97818660736084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,6.841264088948567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,3.008432070414225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,4.050677299499512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.701573371887207
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,14.02395757039388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,6.845365524291992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,4.467647870381673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,28.356224060058594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.9458026885986328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,7.529413223266602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,1,128,1,float16,float16,0,6.189280192057292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,1,128,1,float16,float16,0,15.237412770589193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,2,128,1,float16,float16,0,15.373167673746744
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,2.4046452840169272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,2,128,1,float16,float16,0,7.023295720418294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,4,128,1,float16,float16,0,15.77782948811849
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,16.68004862467448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,3.6758079528808594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,4,128,1,float16,float16,0,7.268501281738281
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,16.39916229248047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.5069493452707926
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,8.266976038614908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,7.548271814982097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,1.3095893065134685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,1,128,1,float16,float16,0,3.3221972783406577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,2,128,1,float16,float16,0,3.468496004740397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,4,128,1,float16,float16,0,3.634373346964518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,7.9713490804036455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,1.8233332633972168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,3.418373425801595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,4.406277338663737
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.2728640039761861
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,1,128,1,float16,float16,0,1.7604692776997883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.8317600091298422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,2,128,1,float16,float16,0,1.7891626358032227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,4,128,1,float16,float16,0,1.607792059580485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,16.006800333658855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,4.251349449157715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,1.678127924601237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,2.417994658152262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,8.415781021118164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,4.046309471130371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,3.190090815226237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.306010643641154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6368373235066732
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,5.3940480550130205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,1,128,1,float16,float16,0,4.887866655985515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,1,128,1,float16,float16,0,11.15402094523112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,1.8105066617329915
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,2,128,1,float16,float16,0,10.482992172241211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,4,128,1,float16,float16,0,11.791306813557943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,2,128,1,float16,float16,0,4.332725207010905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,11.95370101928711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,2.658362706502279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,4,128,1,float16,float16,0,5.098335901896159
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,4.813007990519206
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,11.540938059488932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.33721601963043213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,5.719104131062825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,1,128,1,float16,float16,0,2.273066679636637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,1.0902559757232666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,2,128,1,float16,float16,0,2.462048053741455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,5.453360239664714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,4,128,1,float16,float16,0,2.398266633351644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,2.4567626317342124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,1.2997386455535889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,2.862565358479818
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.2118826707204183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.5882773399353027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,2,128,1,float16,float16,0,1.1663306554158528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,1,128,1,float16,float16,0,1.2238720258076985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,4,128,1,float16,float16,0,1.1392213503519695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,2.7644373575846353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,1.190608024597168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,5.401957194010417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,11.165114084879557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,1.5782079696655273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.8369065920511882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,2.630570729573568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,4.160266558329265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.9424213568369547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,7.022111892700195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,1,128,1,float16,float16,0,6.621813456217448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,2.331658681233724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,1,128,1,float16,float16,0,14.451642354329428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,2,128,1,float16,float16,0,6.374026616414388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,2,128,1,float16,float16,0,14.66144053141276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,4,128,1,float16,float16,0,14.984500885009766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,4,128,1,float16,float16,0,6.817189534505208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,15.58844248453776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,15.237412770589193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.5041280190149943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,3.6685867309570312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,1,128,1,float16,float16,0,2.983706792195638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,7.471354802449544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,1.2229599952697754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,7.043018976847331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,2,128,1,float16,float16,0,3.1505867640177407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,4,128,1,float16,float16,0,3.1510985692342124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,7.2974294026692705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,1.6343413988749187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.2692213257153829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,3.1440747578938804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,1,128,1,float16,float16,0,1.6634079615275066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,3.701653480529785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.5940693219502767
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,3.4939521153767905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,2,128,1,float16,float16,0,1.398373285929362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,4,128,1,float16,float16,0,1.7008320490519206
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.843989372253418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,1.6537920633951824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,1.9799466133117676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,6.164143880208333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.14780267079671225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,1,128,1,float16,float16,0,0.7738666534423828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,2,128,1,float16,float16,0,0.8042186896006266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,1.776693344116211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.42191465695699054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,14.857967376708984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,3.27071475982666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,4,128,1,float16,float16,0,0.8312640190124512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,1.0998613039652507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.7795466581980387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,1.7323625882466633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,1.2327199776967366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,2.589248021443685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,3.7549012502034507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,1,128,1,float16,float16,0,7.864575703938802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,1,128,1,float16,float16,0,3.206970532735189
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.6822613080342611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,2,128,1,float16,float16,0,7.890517552693685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,4,128,1,float16,float16,0,8.175135930379232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,1.442837397257487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,8.867221196492514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,8.60916264851888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,2,128,1,float16,float16,0,3.786890665690104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,4,128,1,float16,float16,0,3.3681812286376953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,1.949610710144043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,4.252053260803223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,4.095610618591309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.2895626624425252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,1,128,1,float16,float16,0,1.7643359502156575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.7224640051523844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,2,128,1,float16,float16,0,1.6638132731119792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,3.928442637125651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,4,128,1,float16,float16,0,1.8983252843221028
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.938202699025472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,2.133413314819336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,1.8794293403625488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.16645333170890808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,1,128,1,float16,float16,0,0.9443840185801188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.4036426544189453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,2,128,1,float16,float16,0,0.8142186800638834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,4,128,1,float16,float16,0,0.9788106282552084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.9745759963989258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,1.962165355682373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.46158401171366376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,1.1838613351186116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,7.970933278401692
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,1,128,1,float16,float16,0,0.45637333393096924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.11693867047627766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.2617493271827698
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,2,128,1,float16,float16,0,0.46249600251515705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,1.1773173014322917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,1.8894987106323242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,3.6468480428059897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,4,128,1,float16,float16,0,0.5806293487548828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.7139840126037598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.47250668207804364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,1.0828639666239421
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,1.4695679346720378
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,2.7445812225341797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,3.6530720392862954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.7354400157928467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,1,128,1,float16,float16,0,3.0548585255940757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,2,128,1,float16,float16,0,7.4575144449869795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,1,128,1,float16,float16,0,7.08183479309082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,4,128,1,float16,float16,0,7.3114878336588545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,1.4091253280639648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,7.57261339823405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,8.251248041788736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,2,128,1,float16,float16,0,3.1348158518473306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.3402239878972371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,4,128,1,float16,float16,0,3.373173395792643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,1.9356320699055989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,1,128,1,float16,float16,0,1.8316532770792644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,4.091658592224121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,3.6170454025268555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.7596373558044434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,3.663658777872721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,2,128,1,float16,float16,0,1.4839946428934734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,4,128,1,float16,float16,0,1.622085412343343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.9418240388234457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,1.8230986595153809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,1.8991360664367676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,1,128,1,float16,float16,0,0.7330346902211508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.1272053321202596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,2,128,1,float16,float16,0,0.7797653675079346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.38284798463185626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,1.8431305885314941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,4,128,1,float16,float16,0,0.8410346508026123
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.9018133481343588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,1.0522613525390625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,7.935781478881836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,3.5277013778686523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,1,128,1,float16,float16,0,0.4050399859746297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.44511465231577557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.07982400059700012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,2,128,1,float16,float16,0,0.4083893299102783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.9761760234832764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,4,128,1,float16,float16,0,0.49004801114400226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.2448319991429647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,1.7665492693583171
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.2485919992129008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.6160266796747843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.4438933531443278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,1,128,1,float16,float16,0,0.25338133176167804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.0775786687930425
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,2,128,1,float16,float16,0,0.25059733788172406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.915450652440389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.1301866670449575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.5872480074564616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,4,128,1,float16,float16,0,0.2477653423945109
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.3893653154373169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.2600906689961751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.5801440080006918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,1.854490598042806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,1.0117333730061848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,2,128,1,float16,float16,0,3.5514774322509766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,2.37391996383667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,1,128,1,float16,float16,0,3.335354804992676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,4,128,1,float16,float16,0,4.013317426045735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,1,128,1,float16,float16,0,1.700160026550293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.5124586820602417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,4.658714612325032
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,2,128,1,float16,float16,0,1.779402732849121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.9065706729888916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,4.419413248697917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,4,128,1,float16,float16,0,2.052069346110026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,1.2059146563212078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,2.256106694539388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,2.2737654050191245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.1497119963169098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,1,128,1,float16,float16,0,0.9960160255432129
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.43263999621073407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,2.04146671295166
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,2,128,1,float16,float16,0,0.9389973481496176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,4,128,1,float16,float16,0,1.0164426962534587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,1.1610986391703289
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.5691893498102824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,1.121893326441447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,4.318112055460612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,1,128,1,float16,float16,0,0.45203200976053876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,1.0733919938405354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.09966933727264404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,2,128,1,float16,float16,0,0.45227734247843426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.23493866125742593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,2.0015467007954917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,4,128,1,float16,float16,0,0.5202773412068685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.5858186483383179
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.624783992767334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.27145065863927204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,1,128,1,float16,float16,0,0.2574399908383687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.6231786807378134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,1.0750292936960857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.16145599881807962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,4,128,1,float16,float16,0,0.2685119907061259
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,2,128,1,float16,float16,0,0.27591999371846515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.39291199048360187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.05994666616121928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.1591200033823649
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,1,128,1,float16,float16,0,0.15050666530927023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.33589335282643634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.058778668443361916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.5887680053710938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.27162667115529376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,4,128,1,float16,float16,0,0.16090133786201477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,2,128,1,float16,float16,0,0.15377066532770792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.09846400221188863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.29052799940109253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.16178133090337118
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.3314133286476135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,2.46397336324056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,1.258618672688802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,1,128,1,float16,float16,0,3.3265867233276367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,4,128,1,float16,float16,0,4.00814946492513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.5905760129292806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,2.0559840202331543
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,4.181119918823242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,4.667658805847168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,1,128,1,float16,float16,0,1.682645320892334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,2,128,1,float16,float16,0,3.607733408610026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,2,128,1,float16,float16,0,1.8690986633300781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,1.0527520179748535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,1.226090669631958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,4,128,1,float16,float16,0,2.0327253341674805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,2.4058400789896646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,4.160890579223633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.2532586654027303
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,1,128,1,float16,float16,0,0.8137333393096924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,4,128,1,float16,float16,0,1.0047893524169922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,2.4349973996480307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.45958932240804035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,2,128,1,float16,float16,0,0.9369440078735352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,2.062725385030111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,1.171674648920695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,1,128,1,float16,float16,0,0.4290879964828491
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.6141226689020792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,1.182207981745402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.08057599763075511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,2,128,1,float16,float16,0,0.4452799956003825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.21205333868662515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,1.0682079792022705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,4,128,1,float16,float16,0,0.5326079924901327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.5934400161107382
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,2.084906737009684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.6369706789652506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,1,128,1,float16,float16,0,0.23014932870864868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.04901333153247833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,1.0819786389668782
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,2,128,1,float16,float16,0,0.23702933390935263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.26571200291315716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.1123306651910146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.5157920122146606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,4,128,1,float16,float16,0,0.250602662563324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.2645813425381978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.3258773287137349
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.14315199851989746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,1,128,1,float16,float16,0,0.1320373316605886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.04230933388074239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,2,128,1,float16,float16,0,0.13532267014185587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.32081600030263263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.5007839997609457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,4,128,1,float16,float16,0,0.14334400494893393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.07600533465544383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.14287466804186502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.19697600603103638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.09558399518330891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.15706132849057516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03982933362325033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,1,128,1,float16,float16,0,0.09063466389973958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.32172266642252606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,2,128,1,float16,float16,0,0.09014933307965596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.06861866513888042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,4,128,1,float16,float16,0,0.09453866879145305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.09428800145785014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.12966400384902954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.15826666355133057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.8879466851552328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,4,128,1,float16,float16,0,2.5838613510131836
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,1.6012372970581055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,1,128,1,float16,float16,0,2.0976853370666504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,3.088597297668457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,1.3941334088643391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,2,128,1,float16,float16,0,2.1189279556274414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,1,128,1,float16,float16,0,0.9839466412862142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,2,128,1,float16,float16,0,1.0576266447703044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.4275466601053874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.6514773368835449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,2.6439040501912436
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,4,128,1,float16,float16,0,1.250069300333659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,1.5432960192362468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,1,128,1,float16,float16,0,0.5109386841456095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,1.4833706219991047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.7441759904225668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.10419199864069621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,4,128,1,float16,float16,0,0.6363199949264526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,1.2893653710683186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.2938399910926819
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.7480213642120361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,2.6801331837972007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,2,128,1,float16,float16,0,0.559338649113973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.3770453135172526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,1,128,1,float16,float16,0,0.2494879961013794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,1.280992031097412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.6487733523050944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.7185440063476562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,2,128,1,float16,float16,0,0.2852693398793538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.062650665640831
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,4,128,1,float16,float16,0,0.30587732791900635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.3614773352940877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.3798453410466512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.1306666632493337
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.1693120002746582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.03933866570393244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,1,128,1,float16,float16,0,0.14946666359901428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.645637313524882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,2,128,1,float16,float16,0,0.15405333042144775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,4,128,1,float16,float16,0,0.16608533263206482
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.08998933434486389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.16588266690572104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.32786667346954346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.22497600317001343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.09585600097974141
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,1,128,1,float16,float16,0,0.0883733332157135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,4,128,1,float16,float16,0,0.09973866740862529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,2,128,1,float16,float16,0,0.0913813312848409
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.19722133874893188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.03448000053564707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.15541332960128784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.32464534044265747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.057722667853037514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.09588799873987834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.055215999484062195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,1,128,1,float16,float16,0,0.05005866785844167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.031301334500312805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.14964266618092856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.20624534289042154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,4,128,1,float16,float16,0,0.05401599903901418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.053445334235827126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,2,128,1,float16,float16,0,0.051776001850763954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.09830400347709656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.1491200029850006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.05528533458709717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,1.6657600402832031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,1.1719733079274495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,2.829888025919596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,1,128,1,float16,float16,0,1.9028533299763997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,1,128,1,float16,float16,0,0.9501653512318929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,1.665141264597575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.5917173226674398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,2,128,1,float16,float16,0,2.095461368560791
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,2,128,1,float16,float16,0,1.0557226339975994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.8078347047170004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,4,128,1,float16,float16,0,1.2845226923624675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,4,128,1,float16,float16,0,2.559903939565023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,3.2379252115885415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.8361492951711019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,1.6773653030395508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,1.6299893061319988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,2.819354693094889
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,2,128,1,float16,float16,0,0.5517653226852417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,1.3433067003885906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.36268266042073566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.20240533351898193
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,1.3449546496073406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,1,128,1,float16,float16,0,0.4854133526484172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.8453439871470133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,4,128,1,float16,float16,0,0.6786239941914877
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.750490665435791
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,2,128,1,float16,float16,0,0.26994667450586957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.058133333921432495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,1,128,1,float16,float16,0,0.2445440093676249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.41601065794626874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.13104533155759177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.6530986626942953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,4,128,1,float16,float16,0,0.33850133419036865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.17272533973058066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,1,128,1,float16,float16,0,0.13014400005340576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.4115413427352905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.408400019009908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.3020053307215373
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.03583466758330663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.0743999977906545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,4,128,1,float16,float16,0,0.15610667069753012
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.6711306571960449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,2,128,1,float16,float16,0,0.1479146679242452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.19991467396418253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.16940265893936157
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.09108266234397888
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.18343466520309448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.02939733366171519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.04685866832733154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,2,128,1,float16,float16,0,0.08475733796755473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.30587732791900635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.12124266227086385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,1,128,1,float16,float16,0,0.08180800080299377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.09174399574597676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,4,128,1,float16,float16,0,0.09316800038019817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.05117866893609365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.02571200082699458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.041093334555625916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,2,128,1,float16,float16,0,0.04675200084845225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.114656001329422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,1,128,1,float16,float16,0,0.044549331068992615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.18634666999181113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,4,128,1,float16,float16,0,0.05050133168697357
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.07468266785144806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.05090666810671488
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.1143893301486969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,1,128,1,float16,float16,0,0.027087998886903126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.03884266565243403
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.07515733440717061
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.03001066545645396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.05376533170541128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,2,128,1,float16,float16,0,0.027850667635599773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,4,128,1,float16,float16,0,0.02978666623433431
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.05723733206590017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.030080000559488933
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.07460799813270569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,1.161679983139038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,1.5704372723897297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,2,128,1,float16,float16,0,1.8858346939086914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,2.045738697052002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,1,128,1,float16,float16,0,1.6970507303873699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,1.6145599683125813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,1,128,1,float16,float16,0,0.6118773221969604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,4,128,1,float16,float16,0,2.3815147082010903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.5404373407363892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,2.461989402770996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,4,128,1,float16,float16,0,1.0728800296783447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.7902613480885824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,2,128,1,float16,float16,0,0.75764266649882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,1.2307679653167725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.7226400375366211
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,2.043722629547119
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,1,128,1,float16,float16,0,0.3125493327776591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,4,128,1,float16,float16,0,0.5318559805552164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.19035732746124268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,2,128,1,float16,float16,0,0.38226131598154706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,1.6099146207173665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.288917342821757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.3574506839116414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.951626698176066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.45273598035176593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.04739200075467428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.7973386446634928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.9473493099212646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,4,128,1,float16,float16,0,0.24920000632603964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.5462933381398519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,2,128,1,float16,float16,0,0.17405333121617636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.09967999656995137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.19814399878184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,1,128,1,float16,float16,0,0.08094933132330577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.12396799524625142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.4530986547470093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.36113067468007404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,2,128,1,float16,float16,0,0.091839998960495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.029648000995318096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.2921440005302429
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.05039466420809428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.059338668982187905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.11597866813341777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.13827733198801676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,1,128,1,float16,float16,0,0.045279999574025474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,4,128,1,float16,float16,0,0.10645332932472229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.1227839986483256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.2039146622021993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.03378133227427801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.022629333039124806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,2,128,1,float16,float16,0,0.05089066425959269
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,1,128,1,float16,float16,0,0.14778666694959006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,4,128,1,float16,float16,0,0.057904000083605446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.07285333176453908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.03425599883000056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.06412800153096516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,1,128,1,float16,float16,0,0.02828799933195114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.12249066432317098
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,2,128,1,float16,float16,0,0.030266667405764263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.018986667195955913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.02762666592995326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,4,128,1,float16,float16,0,0.03401600072781245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.034272000193595886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.047839999198913574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.020389333367347717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.05875733494758606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.06486399968465169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.05358933409055074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,1,128,1,float16,float16,0,0.017450666675964992
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,2,128,1,float16,float16,0,0.01836799954374631
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.03268266717592875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,4,128,1,float16,float16,0,0.02038399999340375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.020389333367347717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.04074133435885111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.04418666660785675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.05384533107280731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.01777600000301997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.0322080006202062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,1,128,1,float16,float16,0,0.016506666938463848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,2,128,1,float16,float16,0,0.016565332810084026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.02548266698916753
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,4,128,1,float16,float16,0,0.017770666629076004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.02495466669400533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.017722666263580322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.02757866680622101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.032218667368094124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,1,128,1,float16,float16,0,0.49641064802805585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.8022826512654623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.5356106758117676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,2,128,1,float16,float16,0,0.7183466752370199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.756981372833252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,4,128,1,float16,float16,0,1.0498613516489665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,1,128,1,float16,float16,0,0.2334559957186381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.7312320073445638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,1.1799466609954834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,2,128,1,float16,float16,0,0.31721067428588867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.2026346723238627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.27753599484761554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.35260268052419025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,4,128,1,float16,float16,0,0.4919840097427368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.7559040387471517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,1,128,1,float16,float16,0,0.10823466380437215
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.04248533149560293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.8018613656361898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.47229333718617755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,2,128,1,float16,float16,0,0.13659733533859253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.3619893391927083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.0788266658782959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.23542932669321695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.14615466197331747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.09981333216031392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,4,128,1,float16,float16,0,0.20881066719690958
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.3641226689020793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.026496000587940216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.04123199979464213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,2,128,1,float16,float16,0,0.06705066561698914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,4,128,1,float16,float16,0,0.08124266564846039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.3518986701965332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.045194665590922035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.10468799869219463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.08891733487447102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,1,128,1,float16,float16,0,0.05845333139101664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,1,128,1,float16,float16,0,0.03311466674009959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.10008533795674641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,2,128,1,float16,float16,0,0.03719466676314672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.14777066310246786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,4,128,1,float16,float16,0,0.044682666659355164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.027189334233601887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.052383999029795326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026714667677879333
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.04499199986457825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.08816533287366231
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,2,128,1,float16,float16,0,0.022517333428064983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.0412266676624616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.01599466676513354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,1,128,1,float16,float16,0,0.02080533280968666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.02090666691462199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,4,128,1,float16,float16,0,0.02643200010061264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.019359999646743137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026698666314284008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.034304000437259674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,1,128,1,float16,float16,0,0.012693333129088083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.015583999454975128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.041306667029857635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.04267199834187826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,2,128,1,float16,float16,0,0.01350933313369751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.034074666599432625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,4,128,1,float16,float16,0,0.01552533358335495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.015674666812022526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.03745600084463755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.04266666869322459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,1,128,1,float16,float16,0,0.011776000261306763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.022128000855445862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.02757866680622101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,4,128,1,float16,float16,0,0.012752000242471695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.025263999899228413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,2,128,1,float16,float16,0,0.011834666132926941
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.02677333354949951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.011509332805871964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,2,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.02186666677395503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,4,128,1,float16,float16,0,0.011605333536863327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.021546666820844013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.018405333161354065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.021562665700912476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.011503999431928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.021903999149799347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,1,128,1,float16,float16,0,0.23062400023142496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.38997332255045575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,2,128,1,float16,float16,0,0.31519466638565063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.19420266151428223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.27452800671259564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,4,128,1,float16,float16,0,0.48892800013224286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.4649813175201416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.2967519958813985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,2,128,1,float16,float16,0,0.11382933457692464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.0425546665986379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.068271999557813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.0876639982064565
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,1,128,1,float16,float16,0,0.08661333719889323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.3619786500930786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,4,128,1,float16,float16,0,0.19559999306996664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.21556266148885092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.11854933698972066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,1,128,1,float16,float16,0,0.04673600196838379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,2,128,1,float16,float16,0,0.054832001527150474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.07827199995517731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,4,128,1,float16,float16,0,0.06991466879844666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.08155199885368347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.026863999664783478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.29875733455022174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.039461334546407066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.11896533767382304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.06166933476924896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,1,128,1,float16,float16,0,0.027402666707833607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.01931200052301089
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,2,128,1,float16,float16,0,0.03134933362404505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,4,128,1,float16,float16,0,0.038693333665529885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.038245332737763725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.0391146664818128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.02235200007756551
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.04550399879614512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.06177600224812826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.03429333368937174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,1,128,1,float16,float16,0,0.016517333686351776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.017717332889636356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.016010666886965435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,2,128,1,float16,float16,0,0.01828266680240631
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,4,128,1,float16,float16,0,0.022250667214393616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.022687998910744984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.024186665813128155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.027632000545660656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,1,128,1,float16,float16,0,0.010330666477481524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.02195200075705846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,4,128,1,float16,float16,0,0.01328533391157786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.014005333185195923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.021104000508785248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.013232000172138214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,2,128,1,float16,float16,0,0.01137599969903628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.03432533393303553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.02204799900452296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,1,128,1,float16,float16,0,0.00922133338948091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.01869333287080129
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.010522666076819101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,2,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,4,128,1,float16,float16,0,0.010213333492477735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.017866666118303936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.018687999496857326
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.010533332824707031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,2,128,1,float16,float16,0,0.008746666833758354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,4,128,1,float16,float16,0,0.009061333412925402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.016442666451136272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.008693333094318708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.01629866659641266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,4,128,1,float16,float16,0,0.008736000085870424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,2,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.00860799973209699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.016010666886965435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.016421332955360413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.0757173349459966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,1,128,1,float16,float16,0,0.08052266637484233
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.042165334026018776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,2,128,1,float16,float16,0,0.10716799894968669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.06741333504517873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,4,128,1,float16,float16,0,0.19661333163579306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.20963199933369955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.09170132875442505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,1,128,1,float16,float16,0,0.04438399771849314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,2,128,1,float16,float16,0,0.052501335740089417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.026426665484905243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.037818667789300285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,4,128,1,float16,float16,0,0.0670666644970576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.07381866872310638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.07555200159549713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.037178667883078255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04907733201980591
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,1,128,1,float16,float16,0,0.025087999800841015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,2,128,1,float16,float16,0,0.028954667349656422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.09152000149091084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,4,128,1,float16,float16,0,0.03655466685692469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.02426133304834366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.04010133445262909
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.027957332630952198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.03695466617743174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.021568000316619873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04919999837875366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.015925332903862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,2,128,1,float16,float16,0,0.016895999511082966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,4,128,1,float16,float16,0,0.020917333662509918
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.021301334102948506
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.017743999759356182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.012815999488035837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.02500266581773758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.018245333184798557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.027642667293548584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,1,128,1,float16,float16,0,0.015546667079130808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.014058666924635569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,4,128,1,float16,float16,0,0.01251199965675672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,1,128,1,float16,float16,0,0.009935999910036722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.018021332720915478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.012805332740147909
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.010437333335479101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,1,128,1,float16,float16,0,0.009056000038981438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,2,128,1,float16,float16,0,0.009194666519761086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,4,128,1,float16,float16,0,0.010255999863147736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.013370666652917862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.01462399959564209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.010330666477481524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,2,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.014773332824309668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.008853333070874214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,1,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.01250133290886879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.008576000109314919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,4,128,1,float16,float16,0,0.00847999999920527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.012597333639860153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,2,128,1,float16,float16,0,0.00847999999920527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,1,128,1,float16,float16,0,0.008309333274761835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,2,128,1,float16,float16,0,0.008325333396593729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.012357333054145178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,4,128,1,float16,float16,0,0.008314666648705801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.012432000289360682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.012181332955757776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.012522666404644648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.008352000266313553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.05077866713205973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,1,128,1,float16,float16,0,0.06912533442179362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.031173333525657654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,2,128,1,float16,float16,0,0.07798400024573009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.05287466446558634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,4,128,1,float16,float16,0,0.09227200349171956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.10269866387049358
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,1,128,1,float16,float16,0,0.03870933254559835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.07764266431331635
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,2,128,1,float16,float16,0,0.04251199960708618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.023557332654794056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,4,128,1,float16,float16,0,0.05009066561857859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.05508266886075338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.027765333652496338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,1,128,1,float16,float16,0,0.021984001000722248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.04257600009441376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.050479998191197716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.019850666324297588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.07750399907430013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,2,128,1,float16,float16,0,0.02366400013367335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,4,128,1,float16,float16,0,0.027376001079877216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.021754667162895203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.030213333666324615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.01607999950647354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.02787200113137563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.042319998145103455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.023434666295846302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.017573333034912746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,4,128,1,float16,float16,0,0.01602666700879733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,1,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,2,128,1,float16,float16,0,0.01413333291808764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.018826667219400406
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.016117333124081295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.021877333521842957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,1,128,1,float16,float16,0,0.008826666822036108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.009999999776482582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,4,128,1,float16,float16,0,0.009925333162148794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.02362666775782903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.01855466639002164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.00996800015370051
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.016677333662907284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,4,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.00873066671192646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,2,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.01670933390657107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.008447999755541483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,1,128,1,float16,float16,0,0.008074666683872541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,2,128,1,float16,float16,0,0.008207999790708223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.01646399994691213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,4,128,1,float16,float16,0,0.008218666538596153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.00808533343176047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,1,128,1,float16,float16,0,0.00816000004609426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,2,128,1,float16,float16,0,0.00797333319981893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.016741332908471424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.008154666672150293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,4,128,1,float16,float16,0,0.008000000069538752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.0080960001796484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,1,128,1,float16,float16,0,0.007925333455204964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.01658133293191592
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.00820266641676426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,4,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.04205333193143209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,1,128,1,float16,float16,0,0.06378133098284404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,2,128,1,float16,float16,0,0.0677706648906072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,4,128,1,float16,float16,0,0.07564266522725423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.025248001019159954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.04242666562398275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.0755573312441508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.06394133468468984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,1,128,1,float16,float16,0,0.03573333223660787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.020207999895016353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,4,128,1,float16,float16,0,0.0415786678592364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,2,128,1,float16,float16,0,0.037808001041412354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.023647998770078022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.041375999649365745
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.02325333406527837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.04205866654713949
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,1,128,1,float16,float16,0,0.02032533288002014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.01764800027012825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.03530666728814443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,2,128,1,float16,float16,0,0.021055998901526134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.06363733112812042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,4,128,1,float16,float16,0,0.023183998962243397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.013530666629473368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.023370665808518726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.023039999107519787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.019850666324297588
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.035205334424972534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,2,128,1,float16,float16,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,1,128,1,float16,float16,0,0.012554666648308435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,4,128,1,float16,float16,0,0.013610667238632837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.018394666413466137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.013658666362365087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,1,128,1,float16,float16,0,0.008421333506703377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.019952000429232914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.008799999952316284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.01646399994691213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,2,128,1,float16,float16,0,0.008527999743819237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,4,128,1,float16,float16,0,0.008703999842206636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.008634666601816813
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.008298666526873907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.01664000004529953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,2,128,1,float16,float16,0,0.008101333553592363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.016522667060295742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,1,128,1,float16,float16,0,0.00820266641676426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.016271999726692837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.008303999900817871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,1,128,1,float16,float16,0,0.007877333089709282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,2,128,1,float16,float16,0,0.00784533346692721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.01637866720557213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.016927999754746754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,4,128,1,float16,float16,0,0.007978666573762894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.007936000203092894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.016554666062196095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.007887999837597212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,1,128,1,float16,float16,0,0.007877333089709282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,2,128,1,float16,float16,0,0.007877333089709282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.007882666463653246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.007861333588759104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,4,128,1,float16,float16,0,0.008000000069538752
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,1,128,1,float16,float16,0,0.00790933333337307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.016469333320856094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,2,128,1,float16,float16,0,0.008010666817426682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,4,128,1,float16,float16,0,0.00784533346692721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.007946666950980822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.017125333348910015
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,8,128,1,float16,float16,0,0.032645332316557564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.03173866619666418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,1,128,1,float16,float16,0,0.01333333303531011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,2,128,1,float16,float16,0,0.024634666740894318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.014655999839305878
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,4,128,1,float16,float16,0,0.038805333276589714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.02073066681623459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.03232000023126602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,1,128,1,float16,float16,0,0.009397333487868309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.0122079998254776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,2,128,1,float16,float16,0,0.016714667280515034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,4,128,1,float16,float16,0,0.023818666736284893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.01421333352724711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,8,128,1,float16,float16,0,0.03246400008598963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.020197333147128422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,8,128,1,float16,float16,0,0.018709332992633183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.0315733328461647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.019834666202465694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.011786667009194693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,4,128,1,float16,float16,0,0.016741332908471424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.013728000223636627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,8,128,1,float16,float16,0,0.011498666057984034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,1,128,1,float16,float16,0,0.008821333448092142
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.01370666672786077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,8,128,1,float16,float16,0,0.018511999398469925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,2,128,1,float16,float16,0,0.0124746672809124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.01987733319401741
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,4,128,1,float16,float16,0,0.012682666381200155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,8,128,1,float16,float16,0,0.007920000081261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,8,128,1,float16,float16,0,0.011567999919255575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.011584000041087469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.013546666751305262
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,1,128,1,float16,float16,0,0.00873066671192646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,2,128,1,float16,float16,0,0.012117333710193634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,4,128,1,float16,float16,0,0.012351999680201212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,8,128,1,float16,float16,0,0.007994666695594788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.011418666690587997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,8,128,1,float16,float16,0,0.007834666719039282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.013376000026861826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,2,128,1,float16,float16,0,0.012181332955757776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,4,128,1,float16,float16,0,0.012149333953857422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,8,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,8,128,1,float16,float16,0,0.007658666620651881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,2,128,1,float16,float16,0,0.011952000359694162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,4,128,1,float16,float16,0,0.012191999703645706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,8,128,1,float16,float16,0,0.007717333113153775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,8,128,1,float16,float16,0,0.007749333356817563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,1,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,2,128,1,float16,float16,0,0.011930666863918304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,4,128,1,float16,float16,0,0.012047999848922094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,8,128,1,float16,float16,0,0.007658666620651881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,8,128,1,float16,float16,0,0.007589333380262057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,1,128,1,float16,float16,0,0.008405333384871483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.011952000359694162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.010751999914646149
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,2,128,1,float16,float16,0,0.008570666735370954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,4,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,8,128,1,float16,float16,0,0.00761600024998188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,3.69816525777181
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,1,128,1,float16,float16,0,5.971482594807942
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,6.347077051798503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,1.9416906038920085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,2,128,1,float16,float16,0,6.360256195068359
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,4,2,128,1,float16,float16,0,13.525472005208334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,4,1,128,1,float16,float16,0,13.656939188639322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,14.293317159016928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,6.6843522389729815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,14.470896402994791
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,7.147866566975911
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,3.473775863647461
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,1.3743252754211426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,1,128,1,float16,float16,0,3.251797358194987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,2,128,1,float16,float16,0,3.124805450439453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,1.509328047434489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,3.047941207885742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,3.8543148040771484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.6185439825057983
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,1,128,1,float16,float16,0,1.527173360188802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,6.892234802246094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,2,128,1,float16,float16,0,1.618607997894287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,3.632261276245117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,1.4579520225524902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,14.470714569091797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,2.143082618713379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,6.925605138142903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,2.1710400581359863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,3.428154627482096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,3.30403200785319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,1.2322293122609456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,1,128,1,float16,float16,0,3.053589185078939
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,4,1,128,1,float16,float16,0,7.14460817972819
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,4,2,128,1,float16,float16,0,7.632309595743815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,2,128,1,float16,float16,0,3.506938616434733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,8.03933334350586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,7.963386535644531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,3.9411147435506186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,3.4895413716634116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,1.9264532725016277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.7751573721567789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,2,128,1,float16,float16,0,1.6500746409098308
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,1,128,1,float16,float16,0,1.6122239430745442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,1.8291093508402507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,2.3445706367492676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.9375572999318441
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,4.117450714111328
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,1,128,1,float16,float16,0,0.8663733005523682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.4636213382085164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,2,128,1,float16,float16,0,0.8718187014261881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,2.0387253761291504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,8.104143778483072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,3.7975893020629883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.8843680222829183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,1.5315839449564617
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,1.9937493006388347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,2.410858631134033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,1,128,1,float16,float16,0,2.1118720372517905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.8137333393096924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,1.6060214042663574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,4,1,128,1,float16,float16,0,4.553034782409668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,4,2,128,1,float16,float16,0,4.639498710632324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,2,128,1,float16,float16,0,2.2508692741394043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,5.356805165608724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,5.619024276733398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,2.5208266576131186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,1.4851999282836914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,2.8856159845987954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.6627626816431681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,1,128,1,float16,float16,0,1.2601386706034343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,2,128,1,float16,float16,0,1.2926026980082195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,1.3665760358174641
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,1.5656959215799968
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.6823093096415201
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,3.0806452433268228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,1,128,1,float16,float16,0,0.6742080052693685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.35895466804504395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,2,128,1,float16,float16,0,0.6472640037536621
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,1.6070399284362793
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,4.905445416768392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,2.7592426935831704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,1.058789332707723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.6604213317235311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,1.5412480036417644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,2.259610652923584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,1.093125343322754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,3.214816093444824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,1,128,1,float16,float16,0,2.7568321228027344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,4,1,128,1,float16,float16,0,6.491952260335286
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,2,128,1,float16,float16,0,2.8538827896118164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,4,2,128,1,float16,float16,0,6.7357972462972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,6.28001594543457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,3.361178716023763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,7.697866439819336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,3.4702345530192056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,1.8509279886881511
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.6998986403147379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,1,128,1,float16,float16,0,1.67194668451945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,2,128,1,float16,float16,0,1.4511946042378743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.8670026461283366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,1.7094772656758626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,1,128,1,float16,float16,0,0.7677493095397949
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,3.7014506657918296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.4304693142573039
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,1.9322400093078613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,7.20248540242513
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,1.9024373690287273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,2,128,1,float16,float16,0,0.8239626884460449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.86572265625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,1.0384426911671956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.2651253342628479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,1,128,1,float16,float16,0,0.44816001256306964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,3.632015864054362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.4556373357772827
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,2,128,1,float16,float16,0,0.45053335030873615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,1.8220000267028809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.725109338760376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,1.1481386820475261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.47114133834838867
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,1.039632002512614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,1.3231200377146404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.6900266806284586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,1,128,1,float16,float16,0,1.6137439409891765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,2.009258588155111
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,4,1,128,1,float16,float16,0,3.211695988972982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,4,2,128,1,float16,float16,0,3.3929386138916016
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,2,128,1,float16,float16,0,1.729866663614909
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,3.862911860148112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,3.9917866388956704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,1.8802612622578938
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,2.0696159998575845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,1.1946720282236736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.44919999440511066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,1,128,1,float16,float16,0,0.8718400001525879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,2.009727954864502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,2,128,1,float16,float16,0,0.9298453330993652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,1.0861279964447021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.523413340250651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,1.0642613569895427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.255237340927124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,1,128,1,float16,float16,0,0.4626773198445638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,4.167824109395345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,2,128,1,float16,float16,0,0.5039360125859579
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,1.0882399876912434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.7524267037709554
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,2.023050626118978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.4819733301798503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.33715200424194336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.1889973282814026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,1,128,1,float16,float16,0,0.2801706592241923
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,2,128,1,float16,float16,0,0.2970079978307088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.6877333323160807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,1.0936319828033447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.30538666248321533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.4668639898300171
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.6459039847056071
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,1.86190398534139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,1.4641812642415364
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,1,128,1,float16,float16,0,1.5230293273925781
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,4,2,128,1,float16,float16,0,3.1882667541503906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.6710293292999268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,3.6768480936686196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,4,1,128,1,float16,float16,0,3.1363093058268228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,3.8129920959472656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,2,128,1,float16,float16,0,1.741546630859375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,0.9852586587270101
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,1.9127146402994792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,1.9144105911254883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,1,128,1,float16,float16,0,0.8987733523050944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.3423519929250081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,1.9245813687642415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,2,128,1,float16,float16,0,0.8422719637552897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.9961493015289307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.4795733292897542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.22347732384999594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,3.606250762939453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,1,128,1,float16,float16,0,0.42084264755249023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,2,128,1,float16,float16,0,0.42423999309539795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,0.9634933471679688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,1.912768046061198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.6182986497879028
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.4701013167699178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.9879626433054606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,1,128,1,float16,float16,0,0.2445919911066691
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.14667200048764548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.602234681447347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,2,128,1,float16,float16,0,0.25169066588083905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.2613333264986674
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,1.0539573033650715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.37863465150197345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.1788960099220276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,1,128,1,float16,float16,0,0.15317866206169128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.363375981648763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,2,128,1,float16,float16,0,0.17244267463684082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.12527466813723245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.26312534014383954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.24226133028666177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.16941867272059122
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.5790719985961914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.34256001313527423
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,1.1869386831919353
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,1,128,1,float16,float16,0,0.9133493105570475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.42045867443084717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,2.128368059794108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,4,2,128,1,float16,float16,0,2.0378506978352866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.9054186344146729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,2.4030240376790366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,2,128,1,float16,float16,0,0.9962453047434489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,4,1,128,1,float16,float16,0,1.8522079785664876
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.639141321182251
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,1.1688533624013264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.20972800254821777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,1,128,1,float16,float16,0,0.4515519936879476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,1.233898639678955
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,2.1081652641296387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.6062026818593343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,2,128,1,float16,float16,0,0.4793866475423177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,1.1929492950439453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,1,128,1,float16,float16,0,0.26387733221054077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.626746654510498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.2786239981651306
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,2,128,1,float16,float16,0,0.27049599091211957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.1616426706314087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.6265066862106323
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.38607998689015705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,1.1722613175710042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.16951467593510947
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.2860906720161438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,1,128,1,float16,float16,0,0.15473600228627524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.09814400474230449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.3452800114949544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.6010133425394694
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,2,128,1,float16,float16,0,0.15663466850916544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.17146666844685873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.2881919940312703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.108106662829717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.34350399176279706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.2874506711959839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,2,128,1,float16,float16,0,0.10403733452161153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.09355200330416362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,1,128,1,float16,float16,0,0.10022399822870891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.10785599549611409
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.18042133251825967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.2916906674702962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,1.2452747027079265
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,2.141002655029297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,1,128,1,float16,float16,0,0.866042693456014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,1.048319975535075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.4548746744791667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,4,1,128,1,float16,float16,0,1.832090695699056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,2,128,1,float16,float16,0,0.9586239655812582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,4,2,128,1,float16,float16,0,2.025183995564779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,1.1635573705037434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,2.450592041015625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.6486506859461466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,1.2506133715311687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,2.148810704549154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.1868799924850464
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.6468213399251302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,1,128,1,float16,float16,0,0.4285120169321696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,1.0827253659566243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.3057386676470439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.5899519920349121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,1.0935306549072266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,1,128,1,float16,float16,0,0.23196266094843546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.55566934744517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.11317333579063416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,2,128,1,float16,float16,0,0.4768213431040446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,2,128,1,float16,float16,0,0.2461386720339457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.29200534025828045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.1557813286781311
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.3115626573562622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.3365973234176636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.560426672299703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.07288533449172974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.1560479998588562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,1,128,1,float16,float16,0,0.13318399588267008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.19218667348225912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.16725333531697592
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.32790400584538776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.10098666946093242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,2,128,1,float16,float16,0,0.144378662109375
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,2,128,1,float16,float16,0,0.0930560032526652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.06771199901898702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,1,128,1,float16,float16,0,0.09067199627558391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.12920000155766806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.10081066687901814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.05230399966239929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.16689066092173258
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.06499200065930684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,1,128,1,float16,float16,0,0.0473280002673467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.05277333160241445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,2,128,1,float16,float16,0,0.04860800007979075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.1218933363755544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.13006933530171713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.12839466333389282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.8001706600189209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.2959573268890381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,1,128,1,float16,float16,0,0.53711465994517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,1.325167973836263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,4,1,128,1,float16,float16,0,1.0735680262247722
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.6535626649856567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.8102826277414957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,2,128,1,float16,float16,0,0.64192001024882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.4081546862920125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.7073706785837809
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,1.342458724975586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,4,2,128,1,float16,float16,0,1.2186720371246338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.7150506973266602
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,1.4972373644510906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,1,128,1,float16,float16,0,0.26359466711680096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.4115840196609497
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,2,128,1,float16,float16,0,0.28657066822052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.13201600313186646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.17892799774805704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.3184106747309367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.09103999535242717
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,1,128,1,float16,float16,0,0.15399466951688132
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.2047040065129598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.17821866273880005
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.35328535238901776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,2,128,1,float16,float16,0,0.16334933042526245
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.21175465981165567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.05715733269850413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.10671466588973999
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.7185973326365153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,1,128,1,float16,float16,0,0.09004799524943034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,2,128,1,float16,float16,0,0.0965119997660319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.3572640021642049
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.15068266789118448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.21621867020924887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.10781866312026978
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.06052800019582113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,2,128,1,float16,float16,0,0.05346666773160299
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.053354665637016296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.1556000014146169
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,1,128,1,float16,float16,0,0.0517546683549881
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.09732799728711446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.06101333101590475
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.04052799940109253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.15569067001342773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.09779199957847595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,2,128,1,float16,float16,0,0.03796799977620443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,1,128,1,float16,float16,0,0.03643733263015747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.04048533240954081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.05089599887530009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.0928053359190623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.09764800469080608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,1,128,1,float16,float16,0,0.5077013174692789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.34298133850097656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.8760639826456705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,2,128,1,float16,float16,0,0.6426719824473063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.8368159929911295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,1.3954985936482747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,4,1,128,1,float16,float16,0,1.052565336227417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,4,2,128,1,float16,float16,0,1.2508533000946045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.7451626459757487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,1.6500800450642903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,1.3649919827779133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.8668746948242188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.7359039783477783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,2,128,1,float16,float16,0,0.31590932607650757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,1,128,1,float16,float16,0,0.261952002843221
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.3461546500523885
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.2014346718788147
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.44839465618133545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.45334935188293457
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.7272586822509766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.07154666880766551
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,1,128,1,float16,float16,0,0.13432533542315164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,2,128,1,float16,float16,0,0.15683733423550925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.10504532853762309
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.19762667020161948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.1860640048980713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.3637973467508952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.18921599785486856
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.04619733492533366
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.12931199868520102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.35093867778778076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,1,128,1,float16,float16,0,0.0855519970258077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,2,128,1,float16,float16,0,0.09070932865142822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.11693867047627766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.05840000013510386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.10501333077748616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.041152000427246094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,2,128,1,float16,float16,0,0.049685334165891014
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.19595734278361002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.058261334896087646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.07216533521811168
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.12190399567286174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.07394133508205414
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.03356266766786575
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,2,128,1,float16,float16,0,0.02961066613594691
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.03814399987459183
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,1,128,1,float16,float16,0,0.02779199928045273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.12172800302505493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,1,128,1,float16,float16,0,0.045653333266576133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.06702933212121327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.03374933451414108
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.028944000601768494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.07381866872310638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,1,128,1,float16,float16,0,0.02624000112215678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,2,128,1,float16,float16,0,0.026842666169007618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.044922664761543274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.028917332490285236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.04710400104522705
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.05003199974695841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.2843946615854899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,1,128,1,float16,float16,0,0.3505653142929077
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.8686239719390869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,2,128,1,float16,float16,0,0.5037440061569214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.5506293376286825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.7785867055257162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.9831626415252686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,4,1,128,1,float16,float16,0,0.9525866508483887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.8146666685740153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,4,2,128,1,float16,float16,0,1.1432106494903564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.39209600289662677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,1.247109333674113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.9837386608123779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.3923199971516927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.5251146554946899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,1,128,1,float16,float16,0,0.176362673441569
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.09697600205739339
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.24118399620056152
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,2,128,1,float16,float16,0,0.23269865910212198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,1,128,1,float16,float16,0,0.08738666772842407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.048432002464930214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.14362133542696634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,2,128,1,float16,float16,0,0.10514666636784871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.2502080003420512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.12416000167528789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.14647466937700906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.07242133220036824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.5230880180994669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.1415786643822988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.2419253389040629
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.06694399813810985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.03286933402220408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,2,128,1,float16,float16,0,0.05745066702365875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.1402453382809957
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.04160533348719279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,1,128,1,float16,float16,0,0.049135997891426086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.0740586668252945
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,1,128,1,float16,float16,0,0.029706666866938274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.02779199928045273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.07291733225186665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,2,128,1,float16,float16,0,0.03377600014209747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.045456002155939736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.04128533353408178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.07406933108965556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.02498133232196172
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.046869332591692604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.024101334313551586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,2,128,1,float16,float16,0,0.020256000260512035
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.04031466692686081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.024192000428835552
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.019600000232458115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,1,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,1,128,1,float16,float16,0,0.018277333428462345
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.04685866832733154
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.037461332976818085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.0317546675602595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.01969066634774208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,2,128,1,float16,float16,0,0.017658667018016178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.036320000886917114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.023578666150569916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,1,128,1,float16,float16,0,0.01626666635274887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.017231999586025875
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.021482666333516438
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,2,128,1,float16,float16,0,0.01637866720557213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.017445333302021027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.023605334262053173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.02199466774861018
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,1,128,1,float16,float16,0,0.12863999605178833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.38045867284138996
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.4216800133387248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.07658666869004567
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,4,1,128,1,float16,float16,0,0.2820640007654826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,2,128,1,float16,float16,0,0.18919465939203897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.2746826608975728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,4,2,128,1,float16,float16,0,0.47391998767852783
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.47067733605702716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.19260267416636148
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.37701332569122314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.1848906675974528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.13782399892807007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,1,128,1,float16,float16,0,0.06333866715431213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.4283733367919922
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.18550399939219156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.0920906662940979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.10397866368293762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.05917333563168844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.11708799997965495
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.038912000755469
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,2,128,1,float16,float16,0,0.08028266827265422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,2,128,1,float16,float16,0,0.04417066772778829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.04643733302752177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.03350399931271871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.05952000121275584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.026202666262785595
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,1,128,1,float16,float16,0,0.022096000611782074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.05046933392683665
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.020970667401949566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.10433066884676616
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,2,128,1,float16,float16,0,0.02587733417749405
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.03176533430814743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,1,128,1,float16,float16,0,0.03619199991226196
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.01933866615096728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.033600000043710075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.03377600014209747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.05039466420809428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.01833600054184596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,2,128,1,float16,float16,0,0.015509333461523056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.01934933289885521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.026837334036827087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,1,128,1,float16,float16,0,0.01198400060335795
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.014720000326633453
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,1,128,1,float16,float16,0,0.013658666362365087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.033733333150545754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.02991466720898946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.025487999121348064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,2,128,1,float16,float16,0,0.012842666357755661
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.014831999937693277
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.026591998835404713
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.02977599948644638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,1,128,1,float16,float16,0,0.01146666705608368
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.02332266668478648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.02186133215824763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.012479999413092932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,2,128,1,float16,float16,0,0.011349332829316458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.021551998953024547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.012517333030700684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.019968000551064808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.011541333049535751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.019440000255902607
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.019573333362738293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.011493333925803503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,2,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.019674666225910187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,1,128,1,float16,float16,0,0.05209066470464071
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.11642666657765706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.15738667050997415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,4,1,128,1,float16,float16,0,0.1021066705385844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.06484800080458324
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,4,2,128,1,float16,float16,0,0.17846399545669556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.17637866735458374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.0356480007370313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,2,128,1,float16,float16,0,0.06888533135255177
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.053114667534828186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.11617066462834676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.06874666611353557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,1,128,1,float16,float16,0,0.029824001093705494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.07947200040022533
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.023317334552605946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.15660267074902853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,2,128,1,float16,float16,0,0.03779733429352442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.029669334491093952
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.04333333174387614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.07970666885375977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.052933335304260254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.039450667798519135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.01777600000301997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,2,128,1,float16,float16,0,0.021712000171343487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.02493866781393687
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.02924266705910365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,1,128,1,float16,float16,0,0.018058666338523228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.026816000541051228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.04347200194994608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,1,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,2,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.017050666113694508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.020053333292404812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.026901334524154663
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.013674666484196981
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,1,128,1,float16,float16,0,0.009685333197315535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.01249066616098086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,2,128,1,float16,float16,0,0.010533332824707031
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.0100853331387043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.01232533281048139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,1,128,1,float16,float16,0,0.008816000074148178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.020992000897725422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.017887999614079792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.010122666756312052
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.016544000556071598
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,1,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.017653333644072216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,2,128,1,float16,float16,0,0.008805333326260248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.008842666943868002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.016117333124081295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.009018666421373686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,1,128,1,float16,float16,0,0.008405333384871483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.016176000237464905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,2,128,1,float16,float16,0,0.008400000010927519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.0086666668454806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.016202667107184727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.051039998730023704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,1,128,1,float16,float16,0,0.028058665494124096
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.023039999107519787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.06760533154010773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,4,1,128,1,float16,float16,0,0.04987733562787374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.03530666728814443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,4,2,128,1,float16,float16,0,0.06611733138561249
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.06117333471775055
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,2,128,1,float16,float16,0,0.03594133257865906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.050981332858403526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.03425066669782003
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.06743999818960826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.02796799937884013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,1,128,1,float16,float16,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.017738666385412216
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.036714665591716766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,2,128,1,float16,float16,0,0.02060266708334287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.01646399994691213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.028069332242012024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.02314666658639908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.036757332583268486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.02242133269707362
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,1,128,1,float16,float16,0,0.010826667149861654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,2,128,1,float16,float16,0,0.012426666915416718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.016271999726692837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.01231466606259346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.023120000958442688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,1,128,1,float16,float16,0,0.009445333232482275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.013717333475748697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.014554666976133982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.012261333564917246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.009957333405812582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,2,128,1,float16,float16,0,0.010048000141978264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.01421333352724711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,1,128,1,float16,float16,0,0.009103999783595404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,2,128,1,float16,float16,0,0.008992000172535578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.01414399966597557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.008757333581646284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,1,128,1,float16,float16,0,0.008469333251317343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.009925333162148794
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.008485333373149237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,1,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012453333785136541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.01246400053302447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.00884799969693025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,2,128,1,float16,float16,0,0.008416000132759413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.008586666857202848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.012373333175977072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.00843733362853527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,1,128,1,float16,float16,0,0.008325333396593729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012330666184425354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.012293333808581034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,2,128,1,float16,float16,0,0.008341333518425623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.012272000312805176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.008367999767263731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012330666184425354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.034602666894594826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,1,128,1,float16,float16,0,0.023423999547958374
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.05159999926884969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,4,1,128,1,float16,float16,0,0.04153066625197729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,4,2,128,1,float16,float16,0,0.04966400067011515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.028202667832374573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.051669334371884666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,2,128,1,float16,float16,0,0.027189334233601887
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.021856000026067097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.027514666318893433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.01972266659140587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.028565332293510437
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.03461333364248276
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,1,128,1,float16,float16,0,0.01413333291808764
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.051488002141316734
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,2,128,1,float16,float16,0,0.01595199977358182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.018735999862353008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.0198186660806338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.011877333124478659
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.022495999932289124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.02829866607983907
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,1,128,1,float16,float16,0,0.009279999881982803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,2,128,1,float16,float16,0,0.009994666402538618
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.01826133330663045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.009749333063761393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.021370666722456615
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.01812800019979477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,1,128,1,float16,float16,0,0.008565333361426989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.01202133297920227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.017664000391960144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,2,128,1,float16,float16,0,0.00874133345981439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.009674666449427605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.008698666468262672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.018079999834299088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,1,128,1,float16,float16,0,0.008373333141207695
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,2,128,1,float16,float16,0,0.008405333384871483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.008661333471536636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.008234666660428047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,1,128,1,float16,float16,0,0.00816000004609426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,2,128,1,float16,float16,0,0.008069333309928576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.008229333286484083
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.0081386665503184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,1,128,1,float16,float16,0,0.008010666817426682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,2,128,1,float16,float16,0,0.008090666805704435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.008074666683872541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,1,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,2,128,1,float16,float16,0,0.007930666829148928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.008010666817426682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.016506666938463848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.026943999032179516
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03991466760635376
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,1,128,1,float16,float16,0,0.02141333371400833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,4,1,128,1,float16,float16,0,0.037621334195137024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,4,2,128,1,float16,float16,0,0.041296000281969704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.02348266790310542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.039103999733924866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,2,128,1,float16,float16,0,0.023082666099071503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.021984001000722248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.019445333629846573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.015423999478419622
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.02252800017595291
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.026933332284291584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03985599925120672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,2,128,1,float16,float16,0,0.01357866699496905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.018351999421914417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.009839999799927076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,1,128,1,float16,float16,0,0.00860799973209699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.01833600054184596
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.016586666305859882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.01580799991885821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.0227360005180041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,2,128,1,float16,float16,0,0.00867733359336853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.009797333429257074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.008581333483258883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,1,128,1,float16,float16,0,0.008186666915814081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.018309333672126133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,2,128,1,float16,float16,0,0.008405333384871483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.016586666305859882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.00814933329820633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.016336000214020412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,1,128,1,float16,float16,0,0.00816000004609426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.016336000214020412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.016927999754746754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,2,128,1,float16,float16,0,0.008101333553592363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.008277333031098047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.008080000057816505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.01616000011563301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,1,128,1,float16,float16,0,0.007925333455204964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.016704000532627106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,2,128,1,float16,float16,0,0.007893333211541176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.007920000081261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.016949333250522614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,1,128,1,float16,float16,0,0.00797333319981893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,2,128,1,float16,float16,0,0.007850666840871176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.016666666915019352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.007840000092983246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.008042666440208754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,1,128,1,float16,float16,0,0.007733333234985669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,2,128,1,float16,float16,0,0.007781333600481351
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.007903999959429106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.01640533283352852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,4,128,1,float16,float16,0,0.018874666343132656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.01970133309563001
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.011952000359694162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,4,1,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.01402666668097178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,4,2,128,1,float16,float16,0,0.023999998966852825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.020303999384244282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,2,128,1,float16,float16,0,0.01659199967980385
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.013818666338920593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,4,128,1,float16,float16,0,0.018906666586796444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,4,128,1,float16,float16,0,0.011727999895811081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.019962667177120846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.01351999988158544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,2,128,1,float16,float16,0,0.012661332885424295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,4,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,4,128,1,float16,float16,0,0.011829332758982977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.013487999637921652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,1,128,1,float16,float16,0,0.008832000195980072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,2,128,1,float16,float16,0,0.012234666695197424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,4,128,1,float16,float16,0,0.007957333077987036
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,4,128,1,float16,float16,0,0.007807999849319458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,1,128,1,float16,float16,0,0.008672000219424566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.011541333049535751
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,2,128,1,float16,float16,0,0.01228800043463707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,4,128,1,float16,float16,0,0.00785600021481514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,4,128,1,float16,float16,0,0.008016000191370646
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,1,128,1,float16,float16,0,0.008703999842206636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,2,128,1,float16,float16,0,0.012085333466529846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,4,128,1,float16,float16,0,0.007941333577036858
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,4,128,1,float16,float16,0,0.007589333380262057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,2,128,1,float16,float16,0,0.0120319997270902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,4,128,1,float16,float16,0,0.007605333502093951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,4,128,1,float16,float16,0,0.007621333623925845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,1,128,1,float16,float16,0,0.008602666358153025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,2,128,1,float16,float16,0,0.012074666718641916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,4,128,1,float16,float16,0,0.007717333113153775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,4,128,1,float16,float16,0,0.007631999750932057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,1,128,1,float16,float16,0,0.008597333605090777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,2,128,1,float16,float16,0,0.008639999975760778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,4,128,1,float16,float16,0,0.007637333124876022
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,2.8678665161132812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,1,128,1,float16,float16,0,2.918837229410807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,2.9651625951131186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,3.5791358947753906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,1.489210605621338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,2,1,128,1,float16,float16,0,5.248186747233073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,1,128,1,float16,float16,0,1.469802697499593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,6.883312225341797
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,6.879248301188151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.8496533234914144
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,3.388586680094401
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,1.8529173533121746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,2.1769493420918784
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,1,128,1,float16,float16,0,0.9302666982014974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.8846027056376139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,1.1862506866455078
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,2.0549227396647134
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,6.435813268025716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,3.2069066365559897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,2.0189599990844727
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,1.6878399848937988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,1,128,1,float16,float16,0,1.562378724416097
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,2,1,128,1,float16,float16,0,3.1502774556477866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.8846666812896729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,2.2884586652119956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,1.6920533180236816
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,3.8921066919962564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,1,128,1,float16,float16,0,1.06876802444458
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,3.7040160497029624
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.9211733341217041
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,2.074282646179199
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,3.592778523763021
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,1,128,1,float16,float16,0,0.5369760195414225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,1.3436479568481445
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.5668960014979044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,1.3285493055979412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.5315680106480917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.8377546469370524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,1.9724799791971843
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,1.1775573094685872
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,1.2148746649424236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,1,128,1,float16,float16,0,1.1074026425679524
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,2,1,128,1,float16,float16,0,2.169648011525472
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,1.215882698694865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,1.504602591196696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,2.80842653910319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.6398613452911377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,2.626944065093994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,1,128,1,float16,float16,0,0.6462506850560507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.651248017946879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,1.0192000071207683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.4112906853357951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,1.614309310913086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,2.5835253397623696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,1,128,1,float16,float16,0,0.3888266483942668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,1.4831892649332683
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.3978240092595418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.6080426772435507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.8409919738769531
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.8134187062581381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,1.6908106803894043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,1,128,1,float16,float16,0,1.4705066680908203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,1.568069299062093
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,1.8223199844360352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,2,1,128,1,float16,float16,0,3.0361547470092773
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.7755200068155924
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,3.22598934173584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,3.4170986811319985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,1,128,1,float16,float16,0,0.7635946273803711
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.4939039945602417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,1.1362933317820232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.7698667049407959
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,1.726842721303304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,1,128,1,float16,float16,0,0.44119465351104736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.44386665026346844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,3.293402671813965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,1.752639929453532
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.9933493137359619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.7711359659830729
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.9828960100809733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.2906399965286255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,1,128,1,float16,float16,0,0.29763199885686237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.48414933681488037
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.6852107048034668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.7424213091532389
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.28516266743342084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.9516639709472656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,1,128,1,float16,float16,0,0.912559986114502
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,2,1,128,1,float16,float16,0,1.8035146395365398
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.9581653277079264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,1.0961600144704182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,2.071669260660807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,1.9182186126708984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.4917600154876709
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.4830346504847209
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.28810666004816693
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,1,128,1,float16,float16,0,0.4703893264134725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,1.0574346383412678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,1.1275893052419026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,1.9285227457682292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.6524159908294678
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.6866559982299805
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,1,128,1,float16,float16,0,0.2853546738624573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.47889065742492676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.2854880094528198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.18996800978978476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.6510346730550131
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.3909173409144084
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.33803733189900714
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,1,128,1,float16,float16,0,0.17694934209187826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.39211201667785645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.18582399686177573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.9362133344014486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.9416693051656088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,1,128,1,float16,float16,0,0.8273013432820638
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,1.8445919354756672
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.4461439847946167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,1.0263520081837971
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,1.8662080764770508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,2,1,128,1,float16,float16,0,1.690831979115804
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.9290293057759603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,1.924458662668864
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,1,128,1,float16,float16,0,0.40833067893981934
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.4829759995142619
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.5859200159708658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.26954134305318195
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.9091306527455648
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,1,128,1,float16,float16,0,0.24846400817235312
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.6135253508885702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.25278933842976886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.16267200311024985
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.34759998321533203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.36266668637593585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,1,128,1,float16,float16,0,0.15433067083358765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.563264012336731
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.2348960041999817
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.09294933080673218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.17638933658599854
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.334666649500529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,1,128,1,float16,float16,0,0.08956266442934673
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.2396106719970703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.09317333499590556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.23487999041875204
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.22787733872731528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.5908639828364054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,1.1129706700642903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.6017066637674967
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,1,128,1,float16,float16,0,0.47126932938893634
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.5687573353449503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,2,1,128,1,float16,float16,0,1.004805326461792
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,1,128,1,float16,float16,0,0.26100265979766846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,1.1786879698435466
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.2890506585439046
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,1.0981547037760417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.5731039841969808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.38555200894673664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.1693120002746582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.28541866938273114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,1,128,1,float16,float16,0,0.15657599767049155
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.27924267450968426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.3383839925130208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.5617119868596395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.16642666856447855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.3389493227005005
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,1,128,1,float16,float16,0,0.10160533587137859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.10825600226720174
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.28472532828648883
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.17715734243392944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.10854933659235637
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.0694400022427241
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.28544000784556073
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.1767680048942566
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,1,128,1,float16,float16,0,0.0661653329928716
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.17399466037750244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.17711466550827026
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.06833600004514058
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,1,128,1,float16,float16,0,0.5059253374735514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.5708906650543213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.614245335261027
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.6169013182322184
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,2,1,128,1,float16,float16,0,0.9991253217061361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,1.0474239985148113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.2918293277422587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,1.1891252994537354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.5077866713205973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,1.0734612941741943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.49670934677124023
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.3121280074119568
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.28219733635584515
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,1,128,1,float16,float16,0,0.23820799589157104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.15424000223477682
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.3154720067977905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,1,128,1,float16,float16,0,0.1397760013739268
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.3078773419062297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.15482133626937866
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.1877546707789103
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.09924800197283427
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,1,128,1,float16,float16,0,0.0923413336277008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.15955199797948202
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.12633599837621054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.15958933035532633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.10021866361300151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.05204799771308899
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.052000001072883606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.12012799580891927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.12654933333396912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.12680533528327942
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,1,128,1,float16,float16,0,0.04865066707134247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.12098133563995361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,1,128,1,float16,float16,0,0.045754666129748024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04740799963474274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.11787733435630798
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04726399978001913
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.1200320025285085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.31834133466084796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,1,128,1,float16,float16,0,0.28275199731191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,2,1,128,1,float16,float16,0,0.5998080174128214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.17977599302927652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.3905973434448242
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.3195573290189107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.39555199940999347
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.7017813523610433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.6528693437576294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.6557066837946574
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,1,128,1,float16,float16,0,0.15470932920773825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.176256000995636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.10566932956377666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.31935999790827435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.2031466762224833
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.2086720069249471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.060565332571665444
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.1495733360449473
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.10684800148010254
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.05987200140953064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.1525226632754008
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,1,128,1,float16,float16,0,0.0525439977645874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.09602666894594829
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,1,128,1,float16,float16,0,0.0937653382619222
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.20981866121292114
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.1520799994468689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.040565334260463715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.09594666957855225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.09211200475692749
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.040421334405740104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,1,128,1,float16,float16,0,0.03792533278465271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.09621333082516988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.09346133470535278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,1,128,1,float16,float16,0,0.03610666592915853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.03714666763941447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.09020266930262248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.09275733431180318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.036992001036802925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,1,128,1,float16,float16,0,0.30032533407211304
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.3630933364232381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.4240640004475911
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.428272008895874
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.6655093431472778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.1964799960454305
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.30394667387008667
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,2,1,128,1,float16,float16,0,0.6375466585159302
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.7708799839019775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.6600480079650879
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.19597333669662476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.1832266648610433
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,1,128,1,float16,float16,0,0.14682132999102274
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.30077866713205975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.10306666294733684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.1914400060971578
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.11555733283360799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.05825066566467285
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,1,128,1,float16,float16,0,0.08762666583061218
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.11734400192896526
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.10252267122268677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,1,128,1,float16,float16,0,0.04878933231035868
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.07153066496054332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.0576853354771932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.1907093326250712
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.11693867047627766
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,1,128,1,float16,float16,0,0.02924799919128418
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.06683200101057689
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.03302400062481562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.03324266771475474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.07142400244871776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.0713919997215271
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.06679466863473256
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.028853334486484528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,1,128,1,float16,float16,0,0.02717866748571396
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.0644160012404124
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.02871999889612198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.06673066814740498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026533332963784535
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.04155733436346054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.04331733286380768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.04317333300908407
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026485333840052288
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,1,128,1,float16,float16,0,0.025568000972270966
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,1,128,1,float16,float16,0,0.2196106712023417
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.24913599093755087
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.3666613499323527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.4634026686350505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.36667199929555255
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.1441386640071869
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.46725332736968994
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.19727466503779092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,2,1,128,1,float16,float16,0,0.5614506800969442
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.19577600558598837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.5579359928766886
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.15403200189272562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.1223413348197937
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,1,128,1,float16,float16,0,0.09892266988754272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.12780800461769104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.07147733370463054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.04048533240954081
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.06460799773534139
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.12733333309491476
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.06832533578077953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,1,128,1,float16,float16,0,0.032842665910720825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,1,128,1,float16,float16,0,0.05452266832192739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.0407679999868075
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.0444213350613912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.06719466547171275
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,1,128,1,float16,float16,0,0.019904000063737232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.02380799998839696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.0444160004456838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.07152533531188965
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.02380799998839696
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.0401706670721372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.044453332821528115
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.03980266551176707
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,1,128,1,float16,float16,0,0.017765333255132038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.03798400113979975
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.03982399900754293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.01952533299724261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.01960533360640208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.029834667841593426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.028586665789286297
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,1,128,1,float16,float16,0,0.01643199970324834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.02980799973011017
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.01611199975013733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,1,128,1,float16,float16,0,0.015941333025693893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.017338667064905167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.01989866668979327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.019472000499566395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.016154666741689045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.0198186660806338
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,1,128,1,float16,float16,0,0.0745119998852412
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.12666133046150208
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.08975467085838318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.14779733618100485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.11914666493733723
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.05787200232346853
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,2,1,128,1,float16,float16,0,0.18180267016092935
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.09278933207194011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.19139732917149863
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.044495999813079834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.1492800017197927
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.05786666770776113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,1,128,1,float16,float16,0,0.04165333261092504
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.032986665765444435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.04446933170159658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,1,128,1,float16,float16,0,0.02497600018978119
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.09213866790135701
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.0328053335348765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.019023999571800232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.03088533381621043
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.04454400142033895
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,1,128,1,float16,float16,0,0.015135999768972397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.030938667555650074
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.02683199942111969
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.01889066646496455
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.0312266672650973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.02657066782315572
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,1,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.014538666854302088
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.014789332946141561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.024538666009902954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.026528000831604004
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,1,128,1,float16,float16,0,0.011605333536863327
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.02317333221435547
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.012613333761692047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.012624000509579977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.024842667082945507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.021829334398110706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.01966399947802226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.011413333316644033
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.019904000063737232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.011551999797423681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,1,128,1,float16,float16,0,0.01137599969903628
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.01972266659140587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,1,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.011194666226704916
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.011120000233252844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,1,128,1,float16,float16,0,0.035429333647092186
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.03711999952793121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.05231999854246775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.0674720009167989
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.05202666421731313
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.0669653316338857
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.02855466554562251
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,2,1,128,1,float16,float16,0,0.06282133360703786
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03732266773780187
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.06508266429106395
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.024127999941507976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.02882666637500127
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,1,128,1,float16,float16,0,0.02096533278624217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.01632533346613248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03741333385308584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.024495999018351238
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.02033599962790807
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,1,128,1,float16,float16,0,0.012549333274364471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.019744000087181728
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.016704000532627106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.012282667060693106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.012416000167528788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.017898666361967724
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.019823999454577763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,1,128,1,float16,float16,0,0.009365333244204521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.017797333498795826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.016757333030303318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.010053333515922228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.010186666622757912
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.017893332988023758
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.016224000602960587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.016517333686351776
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.016623999923467636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.008645333349704742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.016197333733240765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.015840000162522
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.008559999987483025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,1,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.016016000260909397
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.01607999950647354
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,1,128,1,float16,float16,0,0.008453333129485449
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.015909332782030106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.008725333337982496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.008469333251317343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.01613333324591319
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.027429332335789997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,1,128,1,float16,float16,0,0.019333332777023315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.030949334303538006
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.027295999228954315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.021488000949223835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,2,1,128,1,float16,float16,0,0.03346133232116699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.01595199977358182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.03218133250872294
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.020831999679406483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,1,128,1,float16,float16,0,0.012298667182525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03107200066248576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.012063999970753988
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.016143999993801117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,1,128,1,float16,float16,0,0.010378666842977205
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.02053333322207133
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.01463466634353002
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.016063999384641647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.01201066623131434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.01611199975013733
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.014170666535695394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.013424000392357508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,1,128,1,float16,float16,0,0.009136000027259191
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.010010666524370512
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.0100426667680343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.014074667046467463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,1,128,1,float16,float16,0,0.008767999708652496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.00860799973209699
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012383999923865
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.00847999999920527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.0084906667470932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012453333785136541
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,1,128,1,float16,float16,0,0.008447999755541483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.01231466606259346
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,1,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.008538666491707167
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.012266666938861212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.012080000092585882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,1,128,1,float16,float16,0,0.008234666660428047
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.012245333443085352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.008309333274761835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.012234666695197424
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.008383999889095625
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.019445333629846573
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,1,128,1,float16,float16,0,0.01551466683546702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025759999950726826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.020981334149837494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.01937066639463107
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.011829332758982977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,2,1,128,1,float16,float16,0,0.02609066665172577
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,1,128,1,float16,float16,0,0.010133333504199982
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.026602665583292644
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.020096000283956528
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.018661333868900936
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.02590399980545044
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.018160000443458557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.011802667131026586
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.009765333185593287
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.009717333440979322
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.020288000504175823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.008682666967312494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.01820266619324684
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.008650666723648706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,1,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.017594666530688603
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.016693333784739178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.008394666636983553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.008400000010927519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,1,128,1,float16,float16,0,0.008218666538596153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,1,128,1,float16,float16,0,0.008309333274761835
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.008240000034372011
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,1,128,1,float16,float16,0,0.008031999692320824
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.01672533278663953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.007967999825874964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.016682667036851246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.007877333089709282
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,1,128,1,float16,float16,0,0.007840000092983246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.016597333053747814
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.007967999825874964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.021322667598724365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.01543466622630755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.018624000251293182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,1,128,1,float16,float16,0,0.013781332721312841
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.015397333850463232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,2,1,128,1,float16,float16,0,0.022645334402720135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.022015998760859173
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.00972800018886725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.01836799954374631
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.021375998854637146
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.00855466661353906
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.016442666451136272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.01821333294113477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.016575999557971954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,1,128,1,float16,float16,0,0.008496000121037165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.009648000200589498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.008623999853928884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.016682667036851246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.016154666741689045
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.008207999790708223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.008272000278035799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,1,128,1,float16,float16,0,0.008320000022649765
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,1,128,1,float16,float16,0,0.008101333553592363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.008127999802430471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.016805333395799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.007920000081261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.008021333565314611
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.016704000532627106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,1,128,1,float16,float16,0,0.007776000226537387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.016778666526079178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.007903999959429106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.007861333588759104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,1,128,1,float16,float16,0,0.007733333234985669
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.01639466608564059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.007893333211541176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.016672000288963318
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.007776000226537387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,1,128,1,float16,float16,0,0.007840000092983246
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.01629866659641266
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.00786666696270307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,2,128,1,float16,float16,0,0.011706666400035223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.013573333621025085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.011653333902359009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,1,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,2,128,1,float16,float16,0,0.011616000284751257
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,2,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.013829333086808523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,1,128,1,float16,float16,0,0.008832000195980072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,2,128,1,float16,float16,0,0.008047999814152718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.013610667238632837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,2,128,1,float16,float16,0,0.008176000167926153
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,2,128,1,float16,float16,0,0.007770666852593422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.011557333171367645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,2,128,1,float16,float16,0,0.00766933336853981
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,2,128,1,float16,float16,0,0.007946666950980822
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,1,128,1,float16,float16,0,0.00878399983048439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,2,128,1,float16,float16,0,0.007680000116427739
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,2,128,1,float16,float16,0,0.007818666597207388
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,2,128,1,float16,float16,0,0.00766933336853981
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,1,128,1,float16,float16,0,0.008597333605090777
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,2,128,1,float16,float16,0,0.007631999750932057
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,2,128,1,float16,float16,0,0.007642666498819987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,2,128,1,float16,float16,0,0.007663999994595845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.010773333410422007
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,2,128,1,float16,float16,0,0.00922133338948091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,2,128,1,float16,float16,0,0.0075626665105422335
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.010874666273593903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,2,128,1,float16,float16,0,0.010224000240365664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.8249226411183676
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,1.5248160362243652
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.808293342590332
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,1.4676426251729329
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,1.796298662821452
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.5109653472900391
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.4934026797612508
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.9135306676228842
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,3.2210667928059897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,3.2594614028930664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,1.8126719792683919
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.9196639855702718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.8604586919148763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.5391519864400228
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.8865013122558594
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.526581327120463
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,1.1516053676605225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,1.9341120719909668
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,1.1535893281300862
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.3481546640396118
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,1.9649866422017415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.32947200536727905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.6799200375874838
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.6895199616750082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.3895999987920125
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.643338680267334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.6436800161997477
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.388922651608785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.7926506996154785
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,1.4816959698994954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.24885332584381104
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,1.4918773969014485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.24785067637761435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.6019839843114217
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.7921706835428873
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.5673813422520956
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.8072746594746908
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.8204320271809896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.45051201184590656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.9906506538391113
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.4517600138982137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,1.7079839706420898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.2802026669184367
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,1.7081972757975261
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.687397321065267
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.2824053366978963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,1.0020906925201416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.18604799111684164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.18633600076039633
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.6809013684590658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.45265599091847736
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.4535733461380005
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.46885331471761066
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.4716693162918091
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.28471465905507404
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,1.0447626908620198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.6254080136617025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,1.049557367960612
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.28488532702128094
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.1845653255780538
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.6357813278834025
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.36902932325998944
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.12956266601880392
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.18131732940673828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.3703146775563558
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.12492266297340393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.34247998396555585
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.33630398909250897
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.2539733250935872
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.5685919920603434
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.2611413399378459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.558730681737264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.1593546668688456
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.46030934651692706
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.8927093346913656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.44997866948445636
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.1600213348865509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.33318932851155597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.9022986888885498
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.09155733386675517
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.3309226632118225
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.08476799726486206
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.22821333010991415
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.2280906637509664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.23415466149648032
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.23382933934529623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.09171199798583984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.08501332998275757
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.2741439938545227
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.1623306671778361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.5708320140838623
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.16826132933298746
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.2744106650352478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.33342933654785156
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.3350133498509725
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.5622826814651489
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.10609066486358643
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.1058240036169688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.2834080060323079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.1765013337135315
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.285861333211263
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.06550933420658112
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.06843733290831248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.17588800191879272
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.06560533245404561
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.17300266027450562
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.06805866460005443
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.17346133788426718
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.1497760017712911
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.264138658841451
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.15265599886576334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.3097440004348755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.25942399104436237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.5009546677271525
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.31148799260457355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.4991946617762248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.09618133306503296
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.15959466497103372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.09696533282597859
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.15704533457756042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.12591466307640076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.1253439982732137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.05111999809741974
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.12063466509183247
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.051039998730023704
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.04738133152325948
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.12060266733169556
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.04695466657479604
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.11845333377520244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.0452106644709905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.11857600013415019
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.04532266656557719
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.16773333152135214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.10019200046857198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.3141760031382243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.10069333513577779
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.20123199621836343
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.1660053332646688
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.3202773332595825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.05811200042565664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.05806933343410492
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.205402672290802
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.1511733333269755
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.1511840025583903
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.039701332648595176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.039701332648595176
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.0958079993724823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.09576533238093059
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.09301333626111348
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.03541333228349686
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.0925386647383372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.03741333385308584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.03758399933576584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.09072533249855042
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.09120532870292664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.035743998984495796
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.09641066193580627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.18607467412948608
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.09815466403961182
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.1788853406906128
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.17190933227539062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.2966666618982951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.29632532596588135
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.056128000219662987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.11463466286659241
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.18741333484649658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.05598933498064677
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.11594133575757344
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.032127998769283295
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.07074133555094402
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.028570666909217834
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.028581333657105763
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.06684799989064534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.07043733199437459
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.06674133241176605
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.026629333694775898
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.032085334261258446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.06504533191521962
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.02658133457104365
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.06467199822266896
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.03997333347797394
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.040234667559464775
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.02569599946339925
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.025653332471847534
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06622933348019917
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06588799754778545
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.12492266297340393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.1239946683247884
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.12770133217175803
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.19401599963506064
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.13102933764457703
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.03808533400297165
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.1948053240776062
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.06565866867701213
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.0662613312403361
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.03812266637881597
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.04339733223120371
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.019029332945744198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.022474666436513264
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.03984000037113825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.022661333282788593
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.04353066782156626
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.01926400015751521
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.039861333866914116
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.01740266631046931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.03782933453718821
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.01632000009218852
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.027237333357334137
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.03799466788768768
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.027093333502610523
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.01746133342385292
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.016309333344300587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.016058667252461117
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.016000000139077503
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.019189332922299702
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.03019733230272929
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.04223999877770742
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.03038399914900462
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.05248000224431356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.08913066983222961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.05229333539803823
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.08878399928410848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.03031466652949651
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.04234133164087931
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.01777600000301997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.014416000495354334
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.01773333301146825
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.03010133405526479
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.02681066592534383
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.01461333284775416
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.012655999511480331
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.02665599932273229
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.02458133300145467
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.012549333274364471
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.02465066562096278
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.011642667154471079
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.02013333390156428
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.011717333147923151
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.02037866661945979
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.018415999909241993
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.018464000274737675
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.015487999965747198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.015376000354687372
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.026133333643277485
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03533333291610082
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03540800015131632
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.026181332767009735
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.0120319997270902
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.019909333437681198
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.011909333368142446
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023445333043734234
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.010293333480755487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.01027199998497963
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.019914666811625164
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.01814933369557063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.017925333231687546
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.009189333145817121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.016730666160583496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.009317333499590555
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.016271999726692837
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.01605333387851715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.008639999975760778
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.0086666668454806
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.008485333373149237
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.00843733362853527
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.01575999955336253
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.01590399940808614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.016069332758585613
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.01166933278242747
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.01492799942692121
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.014959999670584997
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.019738666713237762
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.010224000240365664
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.019765333582957584
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.014218666901191076
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.009194666519761086
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.014192000031471252
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.010138666878143946
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.00879466657837232
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012495999534924826
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.008832000195980072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.008986666798591614
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.01221866657336553
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.008405333384871483
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.012223999947309494
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.011962667107582092
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.012026666353146235
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.008165333420038223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.008207999790708223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.00978133330742518
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.009749333063761393
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.01823466643691063
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.011509332805871964
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.01156266654531161
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.02027733375628789
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.018160000443458557
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.008517333616813024
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.008458666503429413
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.008400000010927519
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.008133333176374435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.008256000156203905
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.008117333054542542
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.008133333176374435
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.007797333101431529
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.00797333319981893
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.015967999895413715
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.007930666829148928
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.016490666816631954
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.008693333094318708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.009957333405812582
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.018405333161354065
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.009877333417534828
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.018474667022625606
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.008474666625261307
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.008101333553592363
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.016634666671355564
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.008421333506703377
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.01624533285697301
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.008272000278035799
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.008042666440208754
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.00816000004609426
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.007749333356817563
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.00784533346692721
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.016309333344300587
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.007760000104705493
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.01672533278663953
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.007813333223263422
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.007776000226537387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.016613333175579708
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.008037333066264788
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.01139733319481214
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,1,1,128,1,float16,float16,0,0.00785600021481514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,1,1,128,1,float16,float16,0,0.007887999837597212
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,1,1,128,1,float16,float16,0,0.008058666562040647
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.011424000064531961
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,1,1,128,1,float16,float16,0,0.008112000301480293
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.011535999675591787
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,1,1,128,1,float16,float16,0,0.007936000203092894
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,1,1,128,1,float16,float16,0,0.007642666498819987
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,1,1,128,1,float16,float16,0,0.007605333502093951
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,1,1,128,1,float16,float16,0,0.007823999971151352
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,1,1,128,1,float16,float16,0,0.007776000226537387
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,1,1,128,1,float16,float16,0,0.007663999994595845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,1,1,128,1,float16,float16,0,0.00761600024998188
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,1,1,128,1,float16,float16,0,0.007663999994595845
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.010858666151762009
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,1,1,128,1,float16,float16,0,0.00744000015159448
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,1,1,128,1,float16,float16,0,0.00749333327015241
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,1,1,128,1,float16,float16,0,0.007578666632374127
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,1,1,128,1,float16,float16,0,0.007530666887760162
VLLM,0.14.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.01080000028014183
