framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,7.659050623575847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,6.872597376505534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,10.328282674153646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,19.098037719726562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,3.6974881490071616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,8,128,1,float16,float16,0,213.14617919921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,2,128,1,float16,float16,0,212.13348388671875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,1,128,1,float16,float16,0,215.79850260416666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,64,4,128,1,float16,float16,0,217.2341105143229
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,1,128,1,float16,float16,0,108.13848876953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,64,128,1,float16,float16,0,111.40468343098958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,9.727994918823242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,4.112618764241536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,2,128,1,float16,float16,0,107.09665934244792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,5.245125452677409
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,1.8548587163289387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,2.0289600690205893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,64,128,1,float16,float16,0,54.9148915608724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,2.7096001307169595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,1,128,1,float16,float16,0,50.363505045572914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,2,128,1,float16,float16,0,51.16162618001302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,4.984277407328288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,4,128,1,float16,float16,0,51.32373046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,4,128,1,float16,float16,0,105.4950663248698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,64,8,128,1,float16,float16,0,108.20888264973958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,64,128,1,float16,float16,0,28.540318806966145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,1,128,1,float16,float16,0,27.11169687906901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,1.0300640265146892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,1.3969279925028484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,0.9067520300547282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,64,8,128,1,float16,float16,0,52.22644551595052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,223.6243896484375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.509514649709066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,2,128,1,float16,float16,0,27.135770161946613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,5.456160227457683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,4,128,1,float16,float16,0,26.442832946777344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,64,8,128,1,float16,float16,0,27.165082295735676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,116.70878092447917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,56.77745564778646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,7.220986684163411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,12.429157257080078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,5.655808130900065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,2.744703928629557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,64,128,1,float16,float16,0,62.70188903808594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,3.00980281829834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,1,128,1,float16,float16,0,61.085479736328125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,3.618879954020182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,2,128,1,float16,float16,0,59.07426452636719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,6.333536148071289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,1,128,1,float16,float16,0,120.34206136067708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,2,128,1,float16,float16,0,124.75340779622395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,4,128,1,float16,float16,0,122.96267700195312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,64,8,128,1,float16,float16,0,123.2411600748698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,1.3879733085632324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,1.4940212567647297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,4,128,1,float16,float16,0,59.833343505859375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,64,128,1,float16,float16,0,33.08526865641276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,1.8338720003763835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.149237314860026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,1,128,1,float16,float16,0,29.70415496826172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,135.14971923828125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,64,8,128,1,float16,float16,0,61.36854553222656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,0.6885386308034261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,2,128,1,float16,float16,0,31.753072102864582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,4,128,1,float16,float16,0,29.13458760579427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,0.7265333334604899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,64,128,1,float16,float16,0,16.844868977864582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,0.9216053485870361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,1,128,1,float16,float16,0,15.434917449951172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,2,128,1,float16,float16,0,15.045242309570312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.550165335337321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,64,8,128,1,float16,float16,0,29.859578450520832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,4.287760098775228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,4,128,1,float16,float16,0,14.524784088134766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,4.649392127990723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,64,8,128,1,float16,float16,0,14.385796864827475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,65.20630900065105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,35.15977478027344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,5.878304163614909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,9.384831746419271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,2.1029280026753745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,1,128,1,float16,float16,0,42.9033457438151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,2.3446399370829263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,64,128,1,float16,float16,0,45.29120890299479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,2,128,1,float16,float16,0,42.37645975748698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,3.064703941345215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,5.075887997945149
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,2,128,1,float16,float16,0,86.30029296875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,1,128,1,float16,float16,0,86.65355428059895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,4,128,1,float16,float16,0,88.75808715820312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,64,8,128,1,float16,float16,0,87.41038004557292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,4,128,1,float16,float16,0,42.35529073079427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,64,8,128,1,float16,float16,0,43.66414896647135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,1.1574026743570964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,1.1489226818084717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,1.4572639465332031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,94.36585489908855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.5815787315368652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,2,128,1,float16,float16,0,20.783743540445965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,1,128,1,float16,float16,0,21.019488016764324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,64,128,1,float16,float16,0,10.859130859375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,4,128,1,float16,float16,0,20.636613210042317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,64,128,1,float16,float16,0,22.30889638264974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,0.5667466719945272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,64,8,128,1,float16,float16,0,21.242416381835938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,0.6167893409729004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,0.7716639836629232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,1,128,1,float16,float16,0,10.291722615559896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.207856019337972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,2,128,1,float16,float16,0,9.365962982177734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,23.78943379720052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,4,128,1,float16,float16,0,9.965034484863281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,64,8,128,1,float16,float16,0,10.539583841959635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,6.691941579182942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,7.27789306640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,9.324613571166992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,46.01726277669271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,13.675008138020834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,3.593519846598307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,64,128,1,float16,float16,0,59.57354227701823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,1,128,1,float16,float16,0,57.0965576171875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,3.921536127726237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,4.72164789835612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,2,128,1,float16,float16,0,53.942179361979164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,6.952431996663411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,1,128,1,float16,float16,0,116.6038106282552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,2,128,1,float16,float16,0,113.36915079752605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,4,128,1,float16,float16,0,115.83433024088542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,1.8131839434305828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,64,8,128,1,float16,float16,0,118.7885030110677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,1.963754653930664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,4,128,1,float16,float16,0,54.7996571858724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,64,128,1,float16,float16,0,30.750689188639324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,2.401301383972168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.6739253997802734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,64,8,128,1,float16,float16,0,53.689290364583336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,120.27903238932292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,1,128,1,float16,float16,0,27.307210286458332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,0.8570613066355387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,2,128,1,float16,float16,0,26.907435099283855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,0.9803413550059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,4,128,1,float16,float16,0,27.591990152994793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,64,128,1,float16,float16,0,15.132532755533854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,1,128,1,float16,float16,0,12.7280642191569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.1338613033294678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,2,128,1,float16,float16,0,12.67971165974935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.8702826499938965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,64,8,128,1,float16,float16,0,28.64039357503255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.4575306574503581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,64,128,1,float16,float16,0,6.086938858032227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,4,128,1,float16,float16,0,13.519920349121094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.45793068408966064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,1,128,1,float16,float16,0,5.758415857950847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.5983413457870483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,64,8,128,1,float16,float16,0,13.30185063680013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.9039999643961588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,2,128,1,float16,float16,0,5.332309405008952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,4,128,1,float16,float16,0,5.998640060424805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,64,8,128,1,float16,float16,0,6.414960225423177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,5.010549227396647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,32.7541758219401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,15.383162180582682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,62.1111806233724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,5.404042561848958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,6.404533386230469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,9.350154876708984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,2.530394713083903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,64,128,1,float16,float16,0,36.06384023030599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,1,128,1,float16,float16,0,31.654693603515625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,2.906991958618164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,2,128,1,float16,float16,0,30.598470052083332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.426464080810547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,5.085066795349121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,1,128,1,float16,float16,0,66.33296203613281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,2,128,1,float16,float16,0,65.5718485514323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,4,128,1,float16,float16,0,65.37162780761719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,64,8,128,1,float16,float16,0,64.46183776855469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,4,128,1,float16,float16,0,31.733477274576824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,69.6070556640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,64,8,128,1,float16,float16,0,31.2052485148112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.6107199986775715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.3550292650858562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.3508480389912922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,2.451360066731771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,2,128,1,float16,float16,0,14.010725657145182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,64,128,1,float16,float16,0,8.114789326985678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,4,128,1,float16,float16,0,15.990069071451822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,1,128,1,float16,float16,0,15.991546630859375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,0.6858826478322347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.6911146640777588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,8,128,1,float16,float16,0,16.011348724365234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.8069813251495361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,64,64,128,1,float16,float16,0,17.009653727213543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,1.1770293712615967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,1,128,1,float16,float16,0,7.014682769775391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,2,128,1,float16,float16,0,7.2342878977457685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,4,128,1,float16,float16,0,6.654250462849935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,64,128,1,float16,float16,0,3.771333376566569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.315338671207428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,1,128,1,float16,float16,0,3.571791966756185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.3577706813812256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,64,8,128,1,float16,float16,0,7.11524772644043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.4370773235956828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,17.57002131144206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,2,128,1,float16,float16,0,3.3972800572713218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,4,128,1,float16,float16,0,3.2461493810017905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.5988906621932983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,64,8,128,1,float16,float16,0,3.4168694814046225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,7.735872268676758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,34.92933909098307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,7.126325607299805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,7.174485524495442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,8.606762568155924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,11.281349182128906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.3321332931518555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,1,128,1,float16,float16,0,28.906880696614582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,64,128,1,float16,float16,0,33.49508921305338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,3.5926987330118814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,4.092506726582845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,2,128,1,float16,float16,0,29.20347849527995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,4,128,1,float16,float16,0,61.658955891927086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,1,128,1,float16,float16,0,64.25496419270833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,2,128,1,float16,float16,0,61.27087910970052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,5.6258290608723955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.6688373883565266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,64,8,128,1,float16,float16,0,65.28769938151042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.792634646097819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,1,128,1,float16,float16,0,15.931451161702475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,4,128,1,float16,float16,0,29.98363749186198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,64,128,1,float16,float16,0,17.787029266357422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,2,128,1,float16,float16,0,14.693023681640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,2.0819946924845376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,65.46548970540364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,2.9649171829223633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,64,8,128,1,float16,float16,0,28.93017578125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.896117369333903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,4,128,1,float16,float16,0,14.25002670288086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.9497333367665609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,64,128,1,float16,float16,0,7.914165496826172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,1,128,1,float16,float16,0,6.688741048177083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,1.0720372994740803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,2,128,1,float16,float16,0,6.05293337504069
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,64,8,128,1,float16,float16,0,15.982847849527994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,4,128,1,float16,float16,0,6.820101420084636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,33.791605631510414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,16.97589874267578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.41778135299682617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,1.4194879531860352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.4681813319524129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,2,128,1,float16,float16,0,2.9979947408040366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.5294666687647501
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,64,128,1,float16,float16,0,3.6761067708333335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,1,128,1,float16,float16,0,2.8942505518595376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,4,128,1,float16,float16,0,2.7622400919596353
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,64,8,128,1,float16,float16,0,6.3174082438151045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.7144532998402914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.05798399945100149
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,64,128,1,float16,float16,0,1.8934879302978516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.0719893326361974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,64,8,128,1,float16,float16,0,3.042938550313314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.11934399604797363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,1,128,1,float16,float16,0,1.4532426198323567
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,2,128,1,float16,float16,0,1.6451147397359211
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,4,128,1,float16,float16,0,1.4682559967041016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,64,8,128,1,float16,float16,0,1.4252266883850098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.35625600814819336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,7.600858688354492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,3.4502080281575522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,5.012624104817708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,5.279813448588054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,6.308223724365234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,8.190906524658203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,2.509098688761393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,1,128,1,float16,float16,0,17.078245798746746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,64,128,1,float16,float16,0,20.398768107096355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,2.64795192082723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,2,128,1,float16,float16,0,16.58687973022461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,1,128,1,float16,float16,0,34.84503428141276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,2,128,1,float16,float16,0,35.24818674723307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,4,128,1,float16,float16,0,35.52353159586588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,3.007786750793457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,64,8,128,1,float16,float16,0,34.93231964111328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,1.2841493288675945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,3.938927968343099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,1.324623982111613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,1,128,1,float16,float16,0,7.838373184204102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,64,128,1,float16,float16,0,10.30460294087728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,4,128,1,float16,float16,0,17.284138997395832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,1.55732266108195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,2,128,1,float16,float16,0,7.193066914876302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,39.601270039876304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,4,128,1,float16,float16,0,6.386437098185222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,64,8,128,1,float16,float16,0,16.903648376464844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.6391306718190511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,2.089786688486735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.6610879898071289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,64,128,1,float16,float16,0,4.391567866007487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.7515199979146322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,1,128,1,float16,float16,0,3.6537014643351235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,2,128,1,float16,float16,0,3.2918081283569336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,64,8,128,1,float16,float16,0,7.954453150431315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,4,128,1,float16,float16,0,3.4625867207845054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,19.93679936726888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,9.36684799194336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.9767359892527262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.3160266677538554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,2,128,1,float16,float16,0,1.750912030537923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.3203306595484416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,64,8,128,1,float16,float16,0,3.5579093297322593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,1,128,1,float16,float16,0,1.7271199226379395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,64,128,1,float16,float16,0,2.3201866149902344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.4485599994659424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.35415999094645184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,4,128,1,float16,float16,0,1.7725440661112468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.03945599993069967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,64,128,1,float16,float16,0,1.14192001024882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,64,8,128,1,float16,float16,0,1.6828053792317708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.05161066850026449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,4.260026613871257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.08569066723187764
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,2,128,1,float16,float16,0,0.8779679934183756
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,1,128,1,float16,float16,0,0.8376959959665934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,8,128,1,float16,float16,0,0.8620266914367676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,64,4,128,1,float16,float16,0,0.8926186561584473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.14595199624697366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,2.143850644429525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,6.6870988210042315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,7.814090728759766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,10.159066518147787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,7.553397496541341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,3.4618453979492188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,1,128,1,float16,float16,0,15.904885609944662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,64,128,1,float16,float16,0,20.638880411783855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,1,128,1,float16,float16,0,34.952667236328125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,4,128,1,float16,float16,0,34.52759552001953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,3.5135574340820312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,8,128,1,float16,float16,0,35.33631388346354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,64,2,128,1,float16,float16,0,33.270057678222656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,2,128,1,float16,float16,0,16.009552001953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,4.919797261555989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,3.978229204813639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,1.6715946197509766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,64,128,1,float16,float16,0,10.202597300211588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,1,128,1,float16,float16,0,7.185669581095378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,1.749824047088623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,4,128,1,float16,float16,0,16.622352600097656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,2,128,1,float16,float16,0,7.598837534586589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,1.9841119448343914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,39.443092346191406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,64,8,128,1,float16,float16,0,16.418949127197266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,2.5980213483174643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.8496479988098145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,8,128,1,float16,float16,0,6.176597595214844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,64,4,128,1,float16,float16,0,7.041626612345378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.8876000245412191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,1,128,1,float16,float16,0,2.9430932998657227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,64,128,1,float16,float16,0,4.965882619222005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,2,128,1,float16,float16,0,2.9758294423421225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,1.0436800320943196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,1.2861599922180176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,4,128,1,float16,float16,0,3.3517173131306968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,64,8,128,1,float16,float16,0,3.081045468648275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,8.933120091756185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,17.79212824503581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.4500639835993449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,4,128,1,float16,float16,0,1.5313280423482258
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.4285866816838582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,64,128,1,float16,float16,0,2.4424853324890137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,1,128,1,float16,float16,0,1.5771199862162273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,2,128,1,float16,float16,0,1.5724266370137532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.6069386800130209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.5041919946670532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,64,8,128,1,float16,float16,0,1.6547733942667644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.04836800197760264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,64,128,1,float16,float16,0,1.1750293572743733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,1,128,1,float16,float16,0,0.8251146475474039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,2,128,1,float16,float16,0,0.793066660563151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,4.040815989176433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.062037333846092224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.09131200114885966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.2747146685918172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,4,128,1,float16,float16,0,0.8156320254007975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,64,8,128,1,float16,float16,0,0.8500266869862875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,1.0672000249226887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,64,128,1,float16,float16,0,0.6117546558380127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.03201599915822347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,1,128,1,float16,float16,0,0.40034667650858563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.02802666773398717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,2.1385119756062827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.04601066807905833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,2,128,1,float16,float16,0,0.4075253407160441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,4,128,1,float16,float16,0,0.4035733143488566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.0746559997399648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,64,8,128,1,float16,float16,0,0.4100746711095174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,5.012495994567871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,6.2901655832926435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,6.970042546590169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,5.267834663391113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,8,128,1,float16,float16,0,20.705039978027344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,4,128,1,float16,float16,0,20.457589467366535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,1,128,1,float16,float16,0,19.519466400146484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,64,2,128,1,float16,float16,0,20.18276850382487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,2.499098618825277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,2.6396212577819824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,2.9629014333089194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,1,128,1,float16,float16,0,9.25542958577474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,2,128,1,float16,float16,0,8.742533365885416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,64,128,1,float16,float16,0,12.113418579101562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,4,128,1,float16,float16,0,8.491978963216146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,3.5469226837158203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,1.2756053606669109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,64,8,128,1,float16,float16,0,8.638458887736002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,1,128,1,float16,float16,0,3.47542413075765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,64,128,1,float16,float16,0,5.9608001708984375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,2,128,1,float16,float16,0,3.4816001256306968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,1.5732587178548176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,4,128,1,float16,float16,0,3.896026611328125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,1.4141920407613118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,1.8778719902038574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,64,128,1,float16,float16,0,3.076117197672526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,64,8,128,1,float16,float16,0,3.777589480082194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,1,128,1,float16,float16,0,1.9954454104105632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,23.34911346435547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,2,128,1,float16,float16,0,1.873594601949056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.7076906363169352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.7307626406351725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,10.645456314086914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.6336160103480021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,4,128,1,float16,float16,0,1.9015146891276042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,64,8,128,1,float16,float16,0,1.9777439435323079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.8617386817932129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,1,128,1,float16,float16,0,0.9265706539154053
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.3004106680552165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,64,128,1,float16,float16,0,1.5706772804260254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.30165332555770874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,2,128,1,float16,float16,0,0.9645439783732096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.35849066575368244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,5.398778915405273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,4,128,1,float16,float16,0,0.9486292997996012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,64,128,1,float16,float16,0,0.7356853485107422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,64,8,128,1,float16,float16,0,0.9524426460266113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.043562665581703186
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,4,128,1,float16,float16,0,0.5075360139211019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.4164746602376302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,1,128,1,float16,float16,0,0.49478399753570557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,2,128,1,float16,float16,0,0.5023519992828369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,2.756725311279297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.10531733433405559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.06437866886456807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,64,8,128,1,float16,float16,0,0.5141066710154215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.03758399933576584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.024170666933059692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.029365333418051403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,2,128,1,float16,float16,0,0.24920000632603964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,1,128,1,float16,float16,0,0.2523253361384074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,64,128,1,float16,float16,0,0.391269326210022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,4,128,1,float16,float16,0,0.25253333648045856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.036917333801587425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.6390560070673624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,1.2996319929758708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,64,8,128,1,float16,float16,0,0.24651199579238892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.062421331803003945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,6.855087916056315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,9.990341186523438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,7.302591959635417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,7.8122507731119795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,8,128,1,float16,float16,0,20.94873555501302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,1,128,1,float16,float16,0,20.271813710530598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,1,128,1,float16,float16,0,6.712378819783528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,2,128,1,float16,float16,0,19.859914143880207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,3.345221201578776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,64,4,128,1,float16,float16,0,20.315887451171875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,3.53275203704834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,64,128,1,float16,float16,0,13.460160573323568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,4.247568130493164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,2,128,1,float16,float16,0,8.946773529052734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,4,128,1,float16,float16,0,9.48790423075358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,64,8,128,1,float16,float16,0,9.08131217956543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,64,128,1,float16,float16,0,6.768431981404622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,1.7821440696716309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,1.7541386286417644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,1,128,1,float16,float16,0,3.4626986185709634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,2,128,1,float16,float16,0,3.604341189066569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,23.777323404947918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,1.961450735727946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,10.992027282714844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,4.655461311340332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,2.3240639368693032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,4,128,1,float16,float16,0,3.626410802205404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,64,8,128,1,float16,float16,0,3.8091465632120767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,64,128,1,float16,float16,0,3.368186632792155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.8967893123626709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,2,128,1,float16,float16,0,1.719333330790202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,1,128,1,float16,float16,0,1.704319953918457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.933072010676066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.9618293444315592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,1.225167989730835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,4,128,1,float16,float16,0,1.8124640782674153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,64,8,128,1,float16,float16,0,1.8812479972839355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,64,128,1,float16,float16,0,1.6761120160420735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.4500693480173747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,2,128,1,float16,float16,0,0.8966240088144938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,1,128,1,float16,float16,0,0.8828319708506266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,5.639408111572266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.4583093325297038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,4,128,1,float16,float16,0,0.910319964090983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,2.858352025349935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.485861341158549
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.5310560067494711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,64,128,1,float16,float16,0,0.8535253206888834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.05165866514046987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.05763733386993408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,2,128,1,float16,float16,0,0.4749973217646281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,1,128,1,float16,float16,0,0.4766240119934082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,64,8,128,1,float16,float16,0,0.9383626778920492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,1.4057226181030273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,1,128,1,float16,float16,0,0.22207466761271158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.2169920007387797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,8,128,1,float16,float16,0,0.5024373531341553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,64,128,1,float16,float16,0,0.4152746597925822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,64,4,128,1,float16,float16,0,0.4968746503194173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.02827200045188268
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,2,128,1,float16,float16,0,0.22544533014297485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.029178666571776073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.09974933664004008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,4,128,1,float16,float16,0,0.23402132590611777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.03951466580231985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,64,128,1,float16,float16,0,0.15563199917475382
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.057205334305763245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,64,8,128,1,float16,float16,0,0.2379466692606608
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.65774933497111
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.1342080036799113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,2,128,1,float16,float16,0,0.12690132856369019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,4,128,1,float16,float16,0,0.13065066933631897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.13403733571370444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,8,128,1,float16,float16,0,0.12678933143615723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.14246400197347006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,64,1,128,1,float16,float16,0,0.12038399775822957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.15450666348139444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.45349868138631183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,7.185194651285808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,7.042629241943359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,2,128,1,float16,float16,0,12.343167622884115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,1,128,1,float16,float16,0,12.708148956298828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,9.341354370117188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,8,128,1,float16,float16,0,13.748298645019531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,64,4,128,1,float16,float16,0,13.802523295084635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,1,128,1,float16,float16,0,4.359471956888835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,3.5709546407063804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,64,128,1,float16,float16,0,12.794400533040365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,7.820250829060872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,16.38805389404297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,4,128,1,float16,float16,0,4.838282585144043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,3.927424112955729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,2,128,1,float16,float16,0,4.384954770406087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,3.518224080403646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,2,128,1,float16,float16,0,2.1931145985921225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,1,128,1,float16,float16,0,2.115989367167155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,8.032000223795572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,1.6649386088053386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,1.7551147143046062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,64,128,1,float16,float16,0,6.3924001057942705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,64,8,128,1,float16,float16,0,5.413418451944987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,4.990650812784831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,4,128,1,float16,float16,0,2.372981389363607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,1.9660587310791016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,64,128,1,float16,float16,0,3.2214721043904624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,2.3206613858540854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,64,8,128,1,float16,float16,0,2.6874027252197266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.8819466431935629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,4,128,1,float16,float16,0,1.1814026832580566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,2,128,1,float16,float16,0,1.1118559837341309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,4.311327934265137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,1.137984037399292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,8,128,1,float16,float16,0,1.263919989267985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.9593919912974039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,64,128,1,float16,float16,0,1.6138453483581543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.41653335094451904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,1,128,1,float16,float16,0,0.5510613520940145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.4297120173772176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.848090648651123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,2,128,1,float16,float16,0,0.5726879835128784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,2.1209707260131836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,4,128,1,float16,float16,0,0.6004159847895304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.456656018892924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,64,128,1,float16,float16,0,0.8003093401590983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,64,1,128,1,float16,float16,0,1.0696693261464436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.5317493279774984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,1,128,1,float16,float16,0,0.29078400135040283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,64,8,128,1,float16,float16,0,0.6366879940032959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.04717866579691569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.0574186642964681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,2,128,1,float16,float16,0,0.299562672773997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,4,128,1,float16,float16,0,0.31733866532643634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,1.0020960172017415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,64,8,128,1,float16,float16,0,0.32731199264526367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.0819893330335617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.02789866675933202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,1,128,1,float16,float16,0,0.1358506679534912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,64,128,1,float16,float16,0,0.36467734972635907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.19774933656056723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,2,128,1,float16,float16,0,0.13898666699727377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.0373279998699824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,4,128,1,float16,float16,0,0.1429333289464315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.029978667696317036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,64,128,1,float16,float16,0,0.11745066444079082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.047397335370381675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.46716264883677167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,64,8,128,1,float16,float16,0,0.14736533164978027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.13831999897956848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,2,128,1,float16,float16,0,0.07799466451009114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.1344319979349772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,1,128,1,float16,float16,0,0.075914666056633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.13539200027783713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,4,128,1,float16,float16,0,0.08297599852085114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,64,8,128,1,float16,float16,0,0.07886399825414021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,64,128,1,float16,float16,0,0.05144000053405762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.354751984278361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.1478506624698639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,1,128,1,float16,float16,0,0.04517333209514618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,2,128,1,float16,float16,0,0.04417066772778829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.07706666489442189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.16523733735084534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.07750933369000752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.07865066826343536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.08351999521255493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,8,128,1,float16,float16,0,0.043375998735427856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,64,4,128,1,float16,float16,0,0.04385066529115041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,3.5888799031575522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,2,128,1,float16,float16,0,4.075551986694336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,1,128,1,float16,float16,0,4.076058705647786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,3.791269302368164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,3.9725332260131836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,8,128,1,float16,float16,0,5.122053464253743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,1,128,1,float16,float16,0,1.8452480634053547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,1.7894879976908367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,2,128,1,float16,float16,0,1.7750026384989421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,64,4,128,1,float16,float16,0,4.834682782491048
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,64,128,1,float16,float16,0,6.952949523925781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,4,128,1,float16,float16,0,1.9597172737121582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,5.037978808085124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,6.761952082316081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,1.957749366760254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,1.7561492919921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,64,128,1,float16,float16,0,3.217461268107096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.9163466294606527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,64,8,128,1,float16,float16,0,2.2541759808858237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.8795733451843262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,2,128,1,float16,float16,0,0.8835253715515137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,2.5114293098449707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,1.023957331975301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,4,128,1,float16,float16,0,1.0327359835306804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,1.1430826981862385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,64,128,1,float16,float16,0,1.612218697865804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,8,128,1,float16,float16,0,1.1067732969919841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,3.5561014811197915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,64,1,128,1,float16,float16,0,0.8326506614685059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,1,128,1,float16,float16,0,0.4507253170013428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.45185601711273193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.46537601947784424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.4919573465983073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,2,128,1,float16,float16,0,0.4438026746114095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,8,128,1,float16,float16,0,0.5521599849065145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.5317173401514689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,64,4,128,1,float16,float16,0,0.47542933622996014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,1,128,1,float16,float16,0,0.22113066911697388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,64,128,1,float16,float16,0,0.8071893056233724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.04460800190766653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.049695998430252075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.09794132908185323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,4,128,1,float16,float16,0,0.23577600717544556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.18764267365137735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,2,128,1,float16,float16,0,0.22802132368087769
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.7967519760131836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,64,128,1,float16,float16,0,0.3887786865234375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,1,128,1,float16,float16,0,0.09400000174840291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,2,128,1,float16,float16,0,0.09692266583442688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,64,8,128,1,float16,float16,0,0.2791253328323364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.0276853342851003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.029359998802344005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.03656533360481262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,4,128,1,float16,float16,0,0.10153067111968994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,64,8,128,1,float16,float16,0,0.10225600004196167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,1,128,1,float16,float16,0,0.051311999559402466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,64,128,1,float16,float16,0,0.07973866661389668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.3659893274307251
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.04251199960708618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.13433067003885904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.13498133420944214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,2,128,1,float16,float16,0,0.05268266797065735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,4,128,1,float16,float16,0,0.05499200026194254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.139082670211792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,1.743029276529948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,1,128,1,float16,float16,0,0.03009066730737686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,64,128,1,float16,float16,0,0.036464000741640724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.1418346663316091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,64,8,128,1,float16,float16,0,0.055488000313440956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.2999839981396993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.07409599920113881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,4,128,1,float16,float16,0,0.031189332405726116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.07624533275763194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.07547200222810109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,8,128,1,float16,float16,0,0.031184000273545582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.07685866455237071
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,64,128,1,float16,float16,0,0.022181332111358643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,1,128,1,float16,float16,0,0.01893866683046023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.04419733087221781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.04422399898370107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,4,128,1,float16,float16,0,0.019472000499566395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.13409066200256348
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,2,128,1,float16,float16,0,0.01913600042462349
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,64,2,128,1,float16,float16,0,0.0303413321574529
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.044213334719340004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,64,8,128,1,float16,float16,0,0.01921066641807556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.04442666471004486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.07003733515739441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,1.7572479248046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,1,128,1,float16,float16,0,1.9251999855041504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,1.7959520022074382
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,2,128,1,float16,float16,0,2.0339840253194175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,4,128,1,float16,float16,0,2.241706689198812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,2.0988693237304688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,64,8,128,1,float16,float16,0,2.5205920537312827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,1,128,1,float16,float16,0,0.9656000137329102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,2.391178607940674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,64,128,1,float16,float16,0,3.471205393473307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.8904533386230469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,2,128,1,float16,float16,0,1.014682690302531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,3.4276320139567056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,4,128,1,float16,float16,0,0.9932746887207031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,64,8,128,1,float16,float16,0,1.1553760369618733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,64,128,1,float16,float16,0,1.7518773078918457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,1,128,1,float16,float16,0,0.48177599906921387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,1.0158933003743489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,1.1652373472849529
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,2,128,1,float16,float16,0,0.45597867170969647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.4155893325805664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.46320001284281415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.932149330774943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,8,128,1,float16,float16,0,0.5883146524429321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.48968533674875897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,64,128,1,float16,float16,0,0.8368000189463297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,64,4,128,1,float16,float16,0,0.5212693214416504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.5348106622695923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,1.6087946891784668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,1,128,1,float16,float16,0,0.19805334011713663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,2,128,1,float16,float16,0,0.20735466480255127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.07786666850248973
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.04513599971930186
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.052469333012898765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,4,128,1,float16,float16,0,0.24261333545049033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.02720000098148982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,1,128,1,float16,float16,0,0.07347733279069264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.181167999903361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,64,128,1,float16,float16,0,0.3591573238372803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,2,128,1,float16,float16,0,0.07602666815121968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.7593653202056885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.02900800108909607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,4,128,1,float16,float16,0,0.08000533282756805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.03646933287382126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,64,8,128,1,float16,float16,0,0.2853813370068868
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.01978133370478948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.097653329372406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,1,128,1,float16,float16,0,0.0403413325548172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,64,8,128,1,float16,float16,0,0.08116266628106435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.04136000076929728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,2,128,1,float16,float16,0,0.041135999063650765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.29496000210444134
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,64,128,1,float16,float16,0,0.06509866813818614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.020608000457286835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,4,128,1,float16,float16,0,0.043381333351135254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,64,8,128,1,float16,float16,0,0.043365334471066795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.025018667181332905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.02363733450571696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.017477333545684814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,1,128,1,float16,float16,0,0.02370133250951767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,64,128,1,float16,float16,0,0.030202666918436687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.016549333930015564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,2,128,1,float16,float16,0,0.023930666347344715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,4,128,1,float16,float16,0,0.02495466669400533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,64,8,128,1,float16,float16,0,0.024933333198229473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.018112000077962875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,1,128,1,float16,float16,0,0.014592000593741735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,2,128,1,float16,float16,0,0.014549333602190018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.014303999642531076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.01440000037352244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,64,128,1,float16,float16,0,0.018624000251293182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,4,128,1,float16,float16,0,0.01461333284775416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.05287466446558634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,1,128,1,float16,float16,0,0.009733333562811216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,64,8,128,1,float16,float16,0,0.014661333213249842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,64,128,1,float16,float16,0,0.010965333630641302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.013514666507641474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,2,128,1,float16,float16,0,0.009573333586255709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.013861333330472311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,4,128,1,float16,float16,0,0.009685333197315535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.01351999988158544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.02874133239189784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.014592000593741735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,64,8,128,1,float16,float16,0,0.009818666925032934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.013605333864688873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.02051199972629547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,4,128,1,float16,float16,0,1.0210879643758137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,2,128,1,float16,float16,0,0.9400266806284586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,1,128,1,float16,float16,0,0.9612533251444498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,64,8,128,1,float16,float16,0,1.1827893257141113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,64,128,1,float16,float16,0,1.6398773193359375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,1,128,1,float16,float16,0,0.44527467091878253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.8500213623046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.8858400185902914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,1.2492106755574544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.9612159729003906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,2,128,1,float16,float16,0,0.4997493426005046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,1.6190400123596191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,4,128,1,float16,float16,0,0.5018293460210165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.4552533229192098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,64,128,1,float16,float16,0,0.8585920333862305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,1,128,1,float16,float16,0,0.20046399037043253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.416650652885437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.043925335009892784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.42670400937398273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.54038933912913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.05159999926884969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.07410133381684621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,64,8,128,1,float16,float16,0,0.5916106700897217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,4,128,1,float16,float16,0,0.22747733195622763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,64,128,1,float16,float16,0,0.35974931716918945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,8,128,1,float16,float16,0,0.26309865713119507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.6990133126576742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.19034665822982788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,8,128,1,float16,float16,0,0.07578133543332417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.2840320070584615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,64,2,128,1,float16,float16,0,0.2233333388964335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.02889599899450938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.036101333796978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,1,128,1,float16,float16,0,0.06913599868615468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.041434665520985924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,2,128,1,float16,float16,0,0.07067200044790904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,1,128,1,float16,float16,0,0.03826133410135905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.0198186660806338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,2,128,1,float16,float16,0,0.038906666139761605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.0206986665725708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,8,128,1,float16,float16,0,0.041077333192030586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.023589332898457844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,64,128,1,float16,float16,0,0.028149334092934925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.025050667424996693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,64,128,1,float16,float16,0,0.060773332913716636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,64,4,128,1,float16,float16,0,0.0408693328499794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,1,128,1,float16,float16,0,0.021776000658671062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.016293333222468693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,2,128,1,float16,float16,0,0.021856000026067097
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.017525333911180496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,4,128,1,float16,float16,0,0.022976001103719074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,64,8,128,1,float16,float16,0,0.023215999205907185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.07204799850781758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,64,4,128,1,float16,float16,0,0.07453333338101704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.018138666947682697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.03896533449490865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,64,128,1,float16,float16,0,0.0162773331006368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,1,128,1,float16,float16,0,0.013525333255529404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,8,128,1,float16,float16,0,0.013722666849692663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.014416000495354334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.014416000495354334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.014554666976133982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,64,128,1,float16,float16,0,0.01073066641887029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,4,128,1,float16,float16,0,0.013653332988421122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,64,2,128,1,float16,float16,0,0.013365333278973898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,2,128,1,float16,float16,0,0.009253333633144697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,1,128,1,float16,float16,0,0.009290666629870733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.022309333086013794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.01423466702302297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,64,8,128,1,float16,float16,0,0.009354666496316591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,1,128,1,float16,float16,0,0.008650666723648706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.013546666751305262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,4,128,1,float16,float16,0,0.008661333471536636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,8,128,1,float16,float16,0,0.00878399983048439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,64,64,128,1,float16,float16,0,0.013104000439246496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.016037333756685257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,1,128,1,float16,float16,0,0.5118506749471029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.45005865891774494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,2,128,1,float16,float16,0,0.5327733357747396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.6168959935506185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.4707520008087158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,4,128,1,float16,float16,0,0.5874826510747274
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,1,128,1,float16,float16,0,0.2625439961751302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.5148053169250488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,64,128,1,float16,float16,0,0.8022879759470621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,64,8,128,1,float16,float16,0,0.6382453441619873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,4,128,1,float16,float16,0,0.28453866640726727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.8031893571217855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.06445866823196411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,2,128,1,float16,float16,0,0.2767360011736552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.09802666306495667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.24169067541758218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,64,128,1,float16,float16,0,0.36739734808603924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,64,8,128,1,float16,float16,0,0.3139626582463582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,2,128,1,float16,float16,0,0.12126400073369344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,1,128,1,float16,float16,0,0.11924266815185547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.03124266614516576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.14879467089970908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,8,128,1,float16,float16,0,0.12550933162371317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.3715680042902629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.056554665168126426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.03363200028737386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,64,4,128,1,float16,float16,0,0.12480533123016357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,64,128,1,float16,float16,0,0.09778666496276855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,4,128,1,float16,float16,0,0.06541333099206288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.024533333877722423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,1,128,1,float16,float16,0,0.0629120022058487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,2,128,1,float16,float16,0,0.0641653339068095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.12731732924779257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,64,8,128,1,float16,float16,0,0.06534400085608165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.030266667405764263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.02779199928045273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,1,128,1,float16,float16,0,0.03533333291610082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,64,128,1,float16,float16,0,0.041589332123597465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,2,128,1,float16,float16,0,0.03541333228349686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,4,128,1,float16,float16,0,0.03629333277543386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.019808000574509304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,64,8,128,1,float16,float16,0,0.03643733263015747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.021365332106749218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.020682666450738907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.022053333620230358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,64,128,1,float16,float16,0,0.023226665953795116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,2,128,1,float16,float16,0,0.020165332903464634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,1,128,1,float16,float16,0,0.020080000162124634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.06763199965159099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.01929066702723503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.018789333601792652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,4,128,1,float16,float16,0,0.020021333048741024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,64,8,128,1,float16,float16,0,0.01998399943113327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.018191999445358913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.018405333161354065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.01758933315674464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,2,128,1,float16,float16,0,0.012229333321253458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.03819733361403147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.017818666994571686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,1,128,1,float16,float16,0,0.012309333930412928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,4,128,1,float16,float16,0,0.012351999680201212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,64,128,1,float16,float16,0,0.013690666606028875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,64,8,128,1,float16,float16,0,0.012383999923865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.017770666629076004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,64,128,1,float16,float16,0,0.009269333134094873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,1,128,1,float16,float16,0,0.008549333239595095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,2,128,1,float16,float16,0,0.008453333129485449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,4,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.022613334159056347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.017504000415404636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.020053333292404812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,64,8,128,1,float16,float16,0,0.008527999743819237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,64,128,1,float16,float16,0,0.008816000074148178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.017850667238235474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,4,128,1,float16,float16,0,0.008197333042820295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,8,128,1,float16,float16,0,0.010351999973257383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,1,128,1,float16,float16,0,0.008325333396593729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,64,2,128,1,float16,float16,0,0.008122666428486506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.019695999721686046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.06898133456707001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,1,128,1,float16,float16,0,0.4691946506500244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.28751466671625775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.12273599704106648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,4,128,1,float16,float16,0,0.49378132820129395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,2,128,1,float16,float16,0,0.4789439837137858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,64,8,128,1,float16,float16,0,0.5078666607538859
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.1887306571006775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,64,128,1,float16,float16,0,0.41934935251871747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,2,128,1,float16,float16,0,0.22444800535837808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.5127520163853964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.031583999594052635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.0354720006386439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,4,128,1,float16,float16,0,0.22722133000691733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,1,128,1,float16,float16,0,0.223088006178538
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.05562133093674978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.08161599934101105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.02476266771554947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.023168000082174938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,2,128,1,float16,float16,0,0.11373866597811381
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,1,128,1,float16,float16,0,0.1123306651910146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,64,8,128,1,float16,float16,0,0.22947200139363608
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.029477333029111225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,8,128,1,float16,float16,0,0.11640000343322754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,4,128,1,float16,float16,0,0.1153706709543864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.19924267133076987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,64,128,1,float16,float16,0,0.06675733129183452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.019626667102177937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,64,64,128,1,float16,float16,0,0.14342400431632996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.04458666841189066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,1,128,1,float16,float16,0,0.060640002290407814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,4,128,1,float16,float16,0,0.06030400097370148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,2,128,1,float16,float16,0,0.05932266513506571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,64,128,1,float16,float16,0,0.036320000886917114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.10493866602579753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.024522667129834492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,64,8,128,1,float16,float16,0,0.06066666543483734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,2,128,1,float16,float16,0,0.0340693344672521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.020479999482631683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,1,128,1,float16,float16,0,0.03352533280849457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.018730666488409042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,8,128,1,float16,float16,0,0.033674667278925575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.022437334060668945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,64,4,128,1,float16,float16,0,0.03417599946260452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.017973333597183228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,64,128,1,float16,float16,0,0.021509334444999695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.01887999971707662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.058730666836102806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.018394666413466137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,2,128,1,float16,float16,0,0.020314666132132213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.01894933357834816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.034645333886146545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,4,128,1,float16,float16,0,0.019679999599854153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,1,128,1,float16,float16,0,0.019653332730134327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,2,128,1,float16,float16,0,0.01190399999419848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,64,8,128,1,float16,float16,0,0.019653332730134327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.017887999614079792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,4,128,1,float16,float16,0,0.011850666254758835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.01786133274435997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,1,128,1,float16,float16,0,0.011722666521867117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.017781333376963932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,64,128,1,float16,float16,0,0.013141332815090815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.01802666609485944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,64,8,128,1,float16,float16,0,0.0124746672809124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.021477334201335907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,1,128,1,float16,float16,0,0.010421333213647207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.01985599969824155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.01825599993268649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,64,128,1,float16,float16,0,0.013493333011865616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.017749333133300144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,4,128,1,float16,float16,0,0.008303999900817871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,64,8,128,1,float16,float16,0,0.010351999973257383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.017909333109855652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,2,128,1,float16,float16,0,0.008058666562040647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.018021332720915478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,1,128,1,float16,float16,0,0.013424000392357508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,64,128,1,float16,float16,0,0.010586666564146677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,4,128,1,float16,float16,0,0.010485333700974783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,64,8,128,1,float16,float16,0,0.010362666721145311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.017759999881188076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.02011200040578842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,2,128,1,float16,float16,0,0.03126933425664902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,fp8,0,0.02042666698495547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,fp8,0,0.026842666169007618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.03827200084924698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,4,128,1,float16,float16,0,0.044677332043647766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,1,128,1,float16,float16,0,0.015642666568358738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.05779733260472616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,64,8,128,1,float16,float16,0,0.06916800141334534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,64,128,1,float16,float16,0,0.20094400644302368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,fp8,0,0.01492799942692121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,2,128,1,float16,float16,0,0.019941333681344986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,1,128,1,float16,float16,0,0.010687999427318573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.03465066601832708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.17362133661905924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,4,128,1,float16,float16,0,0.02701333413521449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.02309866746266683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,64,128,1,float16,float16,0,0.10282133022944133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,1,128,1,float16,float16,0,0.009653333574533463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,64,8,128,1,float16,float16,0,0.04062400013208389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.09345066547393799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,8,128,1,float16,float16,0,0.024864000578721363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,4,128,1,float16,float16,0,0.017925333231687546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.01568000018596649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,64,128,1,float16,float16,0,0.055344000458717346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.021557333568731945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,1,128,1,float16,float16,0,0.009519999846816063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,fp8,0,0.012554666648308435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,64,2,128,1,float16,float16,0,0.014549333602190018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,2,128,1,float16,float16,0,0.013397333522637686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,fp8,0,0.012341332932313284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,4,128,1,float16,float16,0,0.01357866699496905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.01471466695268949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.03458133339881897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.05387733379999796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,1,128,1,float16,float16,0,0.009365333244204521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,64,8,128,1,float16,float16,0,0.017157333592573803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,fp8,0,0.011999999483426413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,2,128,1,float16,float16,0,0.012944000462690989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,fp8,0,0.011834666132926941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,4,128,1,float16,float16,0,0.012991999586423239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,8,128,1,float16,float16,0,0.01310933381319046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.011941333611806234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,64,64,128,1,float16,float16,0,0.03259200106064478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,64,128,1,float16,float16,0,0.01846933364868164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,1,128,1,float16,float16,0,0.009189333145817121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.013989333063364029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,fp8,0,0.01139733319481214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,4,128,1,float16,float16,0,0.012805332740147909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.011578666667143503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,8,128,1,float16,float16,0,0.01292266696691513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.013546666751305262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,64,128,1,float16,float16,0,0.01173866664369901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,1,128,1,float16,float16,0,0.009232000137368837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,64,2,128,1,float16,float16,0,0.012837332983811697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,2,128,1,float16,float16,0,0.012448000411192576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.023946667710940044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,4,128,1,float16,float16,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,64,8,128,1,float16,float16,0,0.012378666549921036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,64,128,1,float16,float16,0,0.008517333616813024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,fp8,0,0.011968000481526056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,2,128,1,float16,float16,0,0.01198400060335795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.0194560003777345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,8,128,1,float16,float16,0,0.012106666962305704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,64,4,128,1,float16,float16,0,0.012624000509579977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,64,128,1,float16,float16,0,0.008277333031098047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,1,128,1,float16,float16,0,0.008618666479984919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.011541333049535751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,4,128,1,float16,float16,0,0.008639999975760778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,8,128,1,float16,float16,0,0.008650666723648706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,64,2,128,1,float16,float16,0,0.010314666976531347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.011333333949247995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.016480000068744022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,5.329317410786946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,6.365658442179362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,9.740559895833334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,23.418441772460938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,2.6986827850341797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,1,128,1,float16,float16,0,79.8319091796875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,48,128,1,float16,float16,0,85.22336832682292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,3.2360054651896157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,4.795743942260742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,2,128,1,float16,float16,0,82.18102518717448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,11.649883270263672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,2,128,1,float16,float16,0,162.13993326822916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,1,128,1,float16,float16,0,162.08389282226562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,4,128,1,float16,float16,0,162.1692097981771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,1.3594133059183757
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,48,8,128,1,float16,float16,0,166.12708536783853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,48,128,1,float16,float16,0,41.87412770589193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,1.682794729868571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,2.5794612566630044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,4,128,1,float16,float16,0,83.45771789550781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,48,8,128,1,float16,float16,0,81.82769775390625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,5.661269505818685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,177.01861572265625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,1,128,1,float16,float16,0,40.35096995035807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,0.6737333138783773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,48,128,1,float16,float16,0,20.83021291097005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,2,128,1,float16,float16,0,40.14642588297526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,0.8285439809163412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,1.299733320871989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,1,128,1,float16,float16,0,21.588783264160156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,4,128,1,float16,float16,0,42.854390462239586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.8442986806233725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,48,8,128,1,float16,float16,0,39.934549967447914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,4.246111869812012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,2,128,1,float16,float16,0,19.37713623046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,4,128,1,float16,float16,0,19.496517181396484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,48,8,128,1,float16,float16,0,19.801520029703777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,4.5836747487386065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,42.983601888020836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,6.471397399902344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,13.939620971679688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,85.56885782877605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,2.0149760246276855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,48,128,1,float16,float16,0,49.90913391113281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,1,128,1,float16,float16,0,44.803314208984375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,2.309962590535482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,2,128,1,float16,float16,0,45.60010274251302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,3.285680135091146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,1,128,1,float16,float16,0,93.08253987630208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,2,128,1,float16,float16,0,94.27693684895833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,6.981285095214844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,4,128,1,float16,float16,0,92.33387247721355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,1.0050293604532878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,48,8,128,1,float16,float16,0,93.28048706054688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,1.1975733439127605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,48,128,1,float16,float16,0,23.895413716634113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,1.7784372965494792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,100.06578572591145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,1,128,1,float16,float16,0,23.247962951660156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,2,128,1,float16,float16,0,22.071983337402344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,3.525989214579264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,4,128,1,float16,float16,0,47.37432352701823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,0.5393706560134888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,4,128,1,float16,float16,0,23.13905080159505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,0.6089866558710734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,48,8,128,1,float16,float16,0,46.385589599609375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,0.9101653099060059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,48,128,1,float16,float16,0,11.681563059488932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,1,128,1,float16,float16,0,11.169813791910807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.863701343536377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,2,128,1,float16,float16,0,11.593045552571615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,48,8,128,1,float16,float16,0,24.273541768391926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,3.301205317179362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,4,128,1,float16,float16,0,11.427407582600912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,48,8,128,1,float16,float16,0,11.215296427408854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,3.742975870768229
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,5.090928077697754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,25.111712137858074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,51.33695983886719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,10.136751810709635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,1.66212797164917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,48,128,1,float16,float16,0,34.00741322835287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,1,128,1,float16,float16,0,33.77123260498047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,1.8957653045654297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,2.5951627095540366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,2,128,1,float16,float16,0,30.993306477864582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,5.223301251729329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,1,128,1,float16,float16,0,65.55573018391927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,2,128,1,float16,float16,0,67.74428304036458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,4,128,1,float16,float16,0,64.46106974283855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,0.8956693013509115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,48,8,128,1,float16,float16,0,64.57796732584636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,0.9584533373514811
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,48,128,1,float16,float16,0,16.708260854085285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,4,128,1,float16,float16,0,32.840660095214844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.3851146697998047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,1,128,1,float16,float16,0,16.386464436848957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,48,8,128,1,float16,float16,0,32.65087381998698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,2.606133302052816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,70.11274719238281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.42320533593495685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,2,128,1,float16,float16,0,18.12817128499349
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,4,128,1,float16,float16,0,15.91259765625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.5453759829203287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,0.7062880198160807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,48,128,1,float16,float16,0,8.13476816813151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,1,128,1,float16,float16,0,7.772997538248698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,2,128,1,float16,float16,0,8.043930689493815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,1.430064042409261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,4,128,1,float16,float16,0,7.510234832763672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,48,8,128,1,float16,float16,0,16.54095967610677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,34.22746022542318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,48,8,128,1,float16,float16,0,7.396816253662109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,5.249866803487142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,17.667994181315105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,5.870288213094075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,7.664992014567058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,14.0688107808431
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,2.636202653249105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,1,128,1,float16,float16,0,42.66135915120443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,48,128,1,float16,float16,0,45.929219563802086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,2.9518772761027017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,3.907146771748861
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,2,128,1,float16,float16,0,40.976155598958336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,7.210320154825847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,1,128,1,float16,float16,0,86.64901733398438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,2,128,1,float16,float16,0,90.03640747070312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,4,128,1,float16,float16,0,89.2454325358073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,48,8,128,1,float16,float16,0,90.55471801757812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,1.4179040590922039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,1.555557409922282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,48,128,1,float16,float16,0,23.777498881022137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,4,128,1,float16,float16,0,44.477508544921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.03603728612264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,92.3230489095052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,48,8,128,1,float16,float16,0,41.483744303385414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,3.5821812947591147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,1,128,1,float16,float16,0,20.367685953776043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,0.7128853003184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,48,128,1,float16,float16,0,10.680245717366537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,2,128,1,float16,float16,0,21.176442464192707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,0.797648032506307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,1,128,1,float16,float16,0,10.11463483174642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,4,128,1,float16,float16,0,20.601871490478516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.0386186440785725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,2,128,1,float16,float16,0,10.659861246744791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,48,8,128,1,float16,float16,0,21.10667673746745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.8492320378621419
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,4,128,1,float16,float16,0,10.16330655415853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,1,128,1,float16,float16,0,3.996474583943685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,48,8,128,1,float16,float16,0,9.418901443481445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.33607999483744305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,48,128,1,float16,float16,0,5.130154609680176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.3966826597849528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.5490773518880209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,22.341641743977863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,1.0023679733276367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,44.68158976236979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,2,128,1,float16,float16,0,4.710858662923177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,4,128,1,float16,float16,0,4.2359574635823565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,48,8,128,1,float16,float16,0,4.483279863993327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,10.838799794514975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,3.8390773137410483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,4.5572052001953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.458645502726237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,9.37559445699056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,1.9356533686319988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,1,128,1,float16,float16,0,23.595184326171875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,48,128,1,float16,float16,0,26.05615488688151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,2.165013313293457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.75765323638916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,2,128,1,float16,float16,0,24.162996927897137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,1,128,1,float16,float16,0,48.629740397135414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,4.670901298522949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,2,128,1,float16,float16,0,51.52964782714844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,4,128,1,float16,float16,0,51.46629333496094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,48,8,128,1,float16,float16,0,50.0716807047526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.056997299194336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.146239995956421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,4,128,1,float16,float16,0,23.823033650716145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,48,128,1,float16,float16,0,13.09774398803711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.4674347241719563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,1,128,1,float16,float16,0,11.64022445678711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,54.27557373046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,48,8,128,1,float16,float16,0,24.51250712076823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,2.3670506477355957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,2,128,1,float16,float16,0,11.509408315022787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.4838026762008667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,4,128,1,float16,float16,0,11.737509409586588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.5351093212763468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.6879519621531168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,48,128,1,float16,float16,0,6.098165512084961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,2,128,1,float16,float16,0,5.45738156636556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,1,128,1,float16,float16,0,4.914304097493489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,1.2317972977956135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,48,8,128,1,float16,float16,0,12.070335388183594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,4,128,1,float16,float16,0,5.855445226033528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.101200004418691
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,48,8,128,1,float16,float16,0,5.233157475789388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.14913599689801535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,48,128,1,float16,float16,0,2.9387200673421225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,1,128,1,float16,float16,0,2.279786745707194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.34058133761088055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,2,128,1,float16,float16,0,2.5982666015625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,26.58587646484375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,4,128,1,float16,float16,0,2.6901334126790366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.6402613321940104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,13.446085611979166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,48,8,128,1,float16,float16,0,2.32969601949056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,5.493328094482422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,5.106101353963216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,5.59995714823405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,6.758719762166341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,10.436794916788736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.560293356577555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,1,128,1,float16,float16,0,21.87269337972005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,48,128,1,float16,float16,0,25.576494852701824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,2.822117487589518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,3.425146738688151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,2,128,1,float16,float16,0,22.39134470621745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,1,128,1,float16,float16,0,44.623616536458336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,4,128,1,float16,float16,0,46.814239501953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,2,128,1,float16,float16,0,48.20807393391927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,5.190117200215657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,48,8,128,1,float16,float16,0,46.9789072672526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.370789368947347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.4807626406351726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,4,128,1,float16,float16,0,23.1012700398763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,48,128,1,float16,float16,0,12.322736104329428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,48.54637654622396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,1,128,1,float16,float16,0,10.784037272135416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.819167931874593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,48,8,128,1,float16,float16,0,22.56530253092448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,2,128,1,float16,float16,0,11.376885732014975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,2.757253328959147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.6927039623260498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,4,128,1,float16,float16,0,10.208778381347656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.7432373364766439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,48,128,1,float16,float16,0,5.649525324503581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,1,128,1,float16,float16,0,4.979541460673015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,2,128,1,float16,float16,0,4.090986569722493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.8773386478424072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,48,8,128,1,float16,float16,0,10.917349497477213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,1.3796000480651855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,4,128,1,float16,float16,0,4.719013214111328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,1,128,1,float16,float16,0,2.4440693855285645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,48,128,1,float16,float16,0,2.9294614791870117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.3201333284378052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,48,8,128,1,float16,float16,0,4.8813174565633135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.36028265953063965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.46347200870513916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,26.2328364054362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,2,128,1,float16,float16,0,2.3222079277038574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,12.301013946533203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,4,128,1,float16,float16,0,2.1266345977783203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.6623093287150065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,5.35540771484375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.046469335754712425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,48,8,128,1,float16,float16,0,2.2599733670552573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,48,128,1,float16,float16,0,1.421866734822591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.06750933329264323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,2,128,1,float16,float16,0,1.0989920298258464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.1308693289756775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,4,128,1,float16,float16,0,1.0850719610850017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,1,128,1,float16,float16,0,1.0834346612294514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.27774399518966675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,48,8,128,1,float16,float16,0,1.098629315694173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,2.855034510294596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,4.114255905151367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,5.162144025166829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,4.406943957010905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,2,128,1,float16,float16,0,26.64544423421224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,8,128,1,float16,float16,0,27.502891540527344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,2.0558560689290366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,2.0542453130086265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,7.2681223551432295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,2,128,1,float16,float16,0,12.680741628011068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,2.4748533566792807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,1,128,1,float16,float16,0,12.671887715657553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,48,128,1,float16,float16,0,15.320869445800781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,3.69268798828125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,1,128,1,float16,float16,0,25.83656056722005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,48,4,128,1,float16,float16,0,26.7433598836263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,4,128,1,float16,float16,0,13.228341420491537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.9789013067881266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,1.1201866467793782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,48,128,1,float16,float16,0,6.778794606526692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,1,128,1,float16,float16,0,5.071125348409017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,1.2345013618469238
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,48,8,128,1,float16,float16,0,13.25490697224935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,2,128,1,float16,float16,0,5.514021555582683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,1.902341365814209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,4,128,1,float16,float16,0,4.860997200012207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,48,128,1,float16,float16,0,3.378517468770345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.5133333206176758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,48,8,128,1,float16,float16,0,5.6777598063151045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,1,128,1,float16,float16,0,2.7671467463175454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.5350133180618286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,2,128,1,float16,float16,0,2.8124160766601562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.6447039842605591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,14.656511942545572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.9311306476593018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,8,128,1,float16,float16,0,2.6217759450276694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,48,4,128,1,float16,float16,0,2.449295997619629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,7.016496022542317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.12611200412114462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,48,128,1,float16,float16,0,1.720021406809489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,29.04723612467448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.10095466176668803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,1,128,1,float16,float16,0,1.322533369064331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.2731893261273702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,2,128,1,float16,float16,0,1.2786506811777751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,4,128,1,float16,float16,0,1.2969066301981609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,48,8,128,1,float16,float16,0,1.2697813510894775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.03676799933115641
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,48,128,1,float16,float16,0,0.8891786734263102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,1,128,1,float16,float16,0,0.6296319961547852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,3.253664016723633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,2,128,1,float16,float16,0,0.66212264696757
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.04629333317279816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,4,128,1,float16,float16,0,0.6308266719182333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.08405866225560506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,48,8,128,1,float16,float16,0,0.6667199929555258
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.18664000431696573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,1.5606773694356282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.4329013427098592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,5.095568021138509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,6.288485209147136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,5.877797444661458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,9.038810729980469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,2.5451040267944336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,1,128,1,float16,float16,0,8.71721076965332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,48,128,1,float16,float16,0,14.901167551676432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,1,128,1,float16,float16,0,25.47752634684245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,4,128,1,float16,float16,0,25.711087544759113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,2,128,1,float16,float16,0,25.572458902994793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,48,8,128,1,float16,float16,0,27.403653462727863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,2.8047574361165366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,2,128,1,float16,float16,0,11.544330596923828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,3.4157225290934243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,1.3966719309488933
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,4.33511479695638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,1.45907195409139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,1,128,1,float16,float16,0,4.477114677429199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,48,128,1,float16,float16,0,7.19331169128418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,4,128,1,float16,float16,0,12.065077463785807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,48,8,128,1,float16,float16,0,12.241748809814453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,29.24534861246745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,2,128,1,float16,float16,0,4.951770782470703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,1.6765492757161458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,2.184602737426758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,4,128,1,float16,float16,0,4.599786758422852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,48,128,1,float16,float16,0,3.5808159510294595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.6369386514027914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,48,8,128,1,float16,float16,0,5.842453638712565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,13.743125915527344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,1,128,1,float16,float16,0,2.2335893313090005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.7265386581420898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,4,128,1,float16,float16,0,2.332207997639974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.8375893433888754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,2,128,1,float16,float16,0,2.3468480110168457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,1.1289652983347576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.31249600648880005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,1,128,1,float16,float16,0,1.1826399962107341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,48,8,128,1,float16,float16,0,2.4579572677612305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,48,128,1,float16,float16,0,1.8310880661010742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.3226026693979899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,2,128,1,float16,float16,0,1.1611680189768474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,6.494821548461914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.4094826777776082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,4,128,1,float16,float16,0,1.2169280052185059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.4888586600621541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,48,8,128,1,float16,float16,0,1.2542346318562825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,3.1155945460001626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,48,128,1,float16,float16,0,0.884389321009318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.04345066845417023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,1,128,1,float16,float16,0,0.6032906770706177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.05004266897837321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,2,128,1,float16,float16,0,0.6286880175272623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,4,128,1,float16,float16,0,0.6513973474502563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,48,8,128,1,float16,float16,0,0.6378026803334554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.08044266700744629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.18125865856806436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,1,128,1,float16,float16,0,0.3264639973640442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.028229333460330963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,48,128,1,float16,float16,0,0.4589066505432129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,2,128,1,float16,float16,0,0.3178933262825012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.03276266654332479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,4,128,1,float16,float16,0,0.32051199674606323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.05022933085759481
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,1.5760800043741863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.09179199735323589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,48,8,128,1,float16,float16,0,0.32285867134730023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.8102186520894369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,5.018501281738281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,4.107194582621257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,4.090496063232422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,6.3830718994140625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,1.9519413312276204
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,4,128,1,float16,float16,0,15.350699106852213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,1,128,1,float16,float16,0,13.814239501953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,2,128,1,float16,float16,0,14.21938705444336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,48,8,128,1,float16,float16,0,15.393333435058594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,1,128,1,float16,float16,0,6.236325581868489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,2.055573304494222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,48,128,1,float16,float16,0,9.43835703531901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,2.39630397160848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,3.0968478520711265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,2,128,1,float16,float16,0,6.631541570027669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.9719253381093343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,8,128,1,float16,float16,0,6.346549352010091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,1,128,1,float16,float16,0,2.6265014012654624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,48,128,1,float16,float16,0,4.548682530721028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,1.0323839982350667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,2,128,1,float16,float16,0,2.9298454920450845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,48,4,128,1,float16,float16,0,6.316383997599284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,1.1588266690572102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,1.5437866846720378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,4,128,1,float16,float16,0,2.7866080602010093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,1,128,1,float16,float16,0,1.3385066986083984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,48,8,128,1,float16,float16,0,2.9526294072469077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.47464533646901447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.4972960154215495
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,16.06710942586263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,48,128,1,float16,float16,0,2.304778734842936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,7.88588269551595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.5575146675109863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,2,128,1,float16,float16,0,1.3948480288187664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,4,128,1,float16,float16,0,1.456117312113444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,48,8,128,1,float16,float16,0,1.4674293200174968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,3.9937868118286133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.0860586663087209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.12116266290346782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.7838773727416992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,1,128,1,float16,float16,0,0.7199733257293701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,48,128,1,float16,float16,0,1.1748747030893962
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,1.9937866528828938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.32763733466466266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.2176533341407776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,2,128,1,float16,float16,0,0.7321279843648275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,8,128,1,float16,float16,0,0.7558240095774332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.9356106917063395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.029898665845394135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,48,128,1,float16,float16,0,0.5686346689860026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,48,4,128,1,float16,float16,0,0.7219306627909342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,1,128,1,float16,float16,0,0.36028265953063965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.036506667733192444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.061834668119748436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,2,128,1,float16,float16,0,0.3528746763865153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,4,128,1,float16,float16,0,0.36046934127807617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,48,8,128,1,float16,float16,0,0.37908267974853516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,1,128,1,float16,float16,0,0.18787733713785806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,48,128,1,float16,float16,0,0.27963199218114215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,2,128,1,float16,float16,0,0.19086400667826334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.026101333399613697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.021749332547187805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.11703999837239583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,4,128,1,float16,float16,0,0.19561066230138144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,48,8,128,1,float16,float16,0,0.1943839987119039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.03732266773780187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.06971199810504913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.42978668212890625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,5.107824007670085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,5.468736012776692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,6.242021560668945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,8,128,1,float16,float16,0,14.971397399902344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,1,128,1,float16,float16,0,14.978533426920572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,2,128,1,float16,float16,0,14.156346638997396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,2.5855040550231934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,7.905951817830403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,2.7511040369669595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,48,128,1,float16,float16,0,10.030991872151693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,48,4,128,1,float16,float16,0,15.499200185139975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,2,128,1,float16,float16,0,6.132474899291992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,3.149850527445475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,1,128,1,float16,float16,0,5.450101216634114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,4,128,1,float16,float16,0,6.1005064646403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,4.202927907307942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,48,8,128,1,float16,float16,0,6.346944173177083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,1.300106684366862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,1.4705546696980794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,1,128,1,float16,float16,0,2.5392373402913413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,4,128,1,float16,float16,0,2.7329705556233725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,48,128,1,float16,float16,0,5.042453447977702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,2,128,1,float16,float16,0,2.600272019704183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,1.5636852582295735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,8.203850428263346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,1.9712427457173665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,48,128,1,float16,float16,0,2.525424003601074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,1,128,1,float16,float16,0,1.3119839827219646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,48,8,128,1,float16,float16,0,2.9426933924357095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.6392319997151693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,2,128,1,float16,float16,0,1.4850880304972331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.7241173585255941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,4.2952321370442705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,4,128,1,float16,float16,0,1.390501340230306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,17.010906219482422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.747327963511149
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,48,8,128,1,float16,float16,0,1.4666773478190105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,1.0258346398671467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.3211093346277873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,1,128,1,float16,float16,0,0.6865173180898031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,2,128,1,float16,float16,0,0.7071839968363444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.30293865998586017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,48,128,1,float16,float16,0,1.3023626804351807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.3688160181045532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,4,128,1,float16,float16,0,0.717583974202474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.44677865505218506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,48,8,128,1,float16,float16,0,0.7870826721191406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,48,128,1,float16,float16,0,0.6458666721979777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,2,128,1,float16,float16,0,0.3708266814549764
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,2.1472479502360025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.03707200040419897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.044863998889923096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,8,128,1,float16,float16,0,0.3943413496017456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,1,128,1,float16,float16,0,0.35600535074869794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.9887093702952067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,48,4,128,1,float16,float16,0,0.39585065841674805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.13699199755986533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.06877866884072621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,2,128,1,float16,float16,0,0.17625067631403604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.028938665986061096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.02439466615517934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,1,128,1,float16,float16,0,0.17563732465108237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,48,128,1,float16,float16,0,0.3224266568819682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,4,128,1,float16,float16,0,0.18645866711934408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.04053333401679993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,48,8,128,1,float16,float16,0,0.18089600404103598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.06158400078614553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,2,128,1,float16,float16,0,0.09859200318654378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.10515200098355611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,1,128,1,float16,float16,0,0.09714133540789287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.49622400601704914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,48,128,1,float16,float16,0,0.10920533537864685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,4,128,1,float16,float16,0,0.10178666313489278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.10852266351381938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.11623467008272807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,48,8,128,1,float16,float16,0,0.10205866893132527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.13401066263516745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.315557340780894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,1,128,1,float16,float16,0,9.305472056070963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,5.486565272013347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,5.481333414713542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,2,128,1,float16,float16,0,9.372389475504557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,1,128,1,float16,float16,0,3.2844692866007485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,6.271930694580078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,4,128,1,float16,float16,0,10.091503779093424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,2.5477652549743652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,7.926037470499675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,48,128,1,float16,float16,0,9.668986638387045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,12.151781717936197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,48,8,128,1,float16,float16,0,10.81656010945638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,2.727269490559896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,2,128,1,float16,float16,0,3.346533457438151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,4,128,1,float16,float16,0,3.699178695678711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,3.1754347483317056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,1,128,1,float16,float16,0,1.608357270558675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,1.2770666281382244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,48,8,128,1,float16,float16,0,4.413536071777344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,1.3629706700642903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,6.330656051635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,1.540826638539632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,4.210463841756185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,4,128,1,float16,float16,0,1.8374667167663574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,2,128,1,float16,float16,0,1.699984073638916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,1.9666879971822102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,48,128,1,float16,float16,0,2.621386686960856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,8,128,1,float16,float16,0,2.189477284749349
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,3.138271967569987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.6393226782480875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,48,48,128,1,float16,float16,0,4.815744082132976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,2,128,1,float16,float16,0,0.8525760173797607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,4,128,1,float16,float16,0,0.9387786388397217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.6722613175710043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,48,128,1,float16,float16,0,1.3141120274861653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,8,128,1,float16,float16,0,1.0215466817220051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.9505333105723063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.7527466615041097
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,1.5746879577636719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.31547733147939044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,2,128,1,float16,float16,0,0.4508693218231201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.3184373378753662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,1,128,1,float16,float16,0,0.4280213514963786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.33903467655181885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,1,128,1,float16,float16,0,0.2220426599184672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.03696000079313914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,4,128,1,float16,float16,0,0.47434135278066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,48,128,1,float16,float16,0,0.641541322072347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.4487839937210083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,48,1,128,1,float16,float16,0,0.8225226402282715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,48,8,128,1,float16,float16,0,0.5113813479741415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.04166933397452036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,2,128,1,float16,float16,0,0.23267734050750732
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,4,128,1,float16,float16,0,0.25059733788172406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.6921439965566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.10255466898282369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.023989332218964893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.06537599861621857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,2,128,1,float16,float16,0,0.10831466317176819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,1,128,1,float16,float16,0,0.10594666997591655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,48,8,128,1,float16,float16,0,0.2633226712544759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.33825600147247314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,4,128,1,float16,float16,0,0.1123306651910146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.02603200078010559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,8,128,1,float16,float16,0,0.11292800307273865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.036202666660149894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,48,48,128,1,float16,float16,0,0.2531893253326416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.04534400006135305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,2,128,1,float16,float16,0,0.0622026671965917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.10475200414657593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,1,128,1,float16,float16,0,0.06148266792297363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.10588266452153523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,48,128,1,float16,float16,0,0.07397866745789845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,8,128,1,float16,float16,0,0.06412266691525777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,48,4,128,1,float16,float16,0,0.06690133114655812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.11223999659220378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.12313066919644673
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,48,128,1,float16,float16,0,0.03957866628964742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.06025599936644236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.24526933828989664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.059802666306495667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,1,128,1,float16,float16,0,0.03444266567627589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,2,128,1,float16,float16,0,0.03505066782236099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,4,128,1,float16,float16,0,0.03581333408753077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.06516799827416737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,48,8,128,1,float16,float16,0,0.036090667049090065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.0689333329598109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.128629336754481
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,1,128,1,float16,float16,0,2.9589598973592124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,2.74124813079834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,2,128,1,float16,float16,0,3.167626698811849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,4,128,1,float16,float16,0,3.4748481114705405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,3.1785761515299478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,2.739269256591797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,1,128,1,float16,float16,0,1.2995893160502117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,48,8,128,1,float16,float16,0,4.090218544006348
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,1.4578240712483723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,48,128,1,float16,float16,0,4.87555726369222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,1.299295981725057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,2,128,1,float16,float16,0,1.3894294102986653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,4,128,1,float16,float16,0,1.5376426378885906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,5.551584243774414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,1,128,1,float16,float16,0,0.6410559813181559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,48,128,1,float16,float16,0,2.4560426076253257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,4.271871884663899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,1.5747520128885906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,1.9956693649291992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,2,128,1,float16,float16,0,0.6837226549784342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,48,8,128,1,float16,float16,0,2.0078879992167153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.6838666598002116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,4,128,1,float16,float16,0,0.7689546744028727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,2.6713441212972007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.7497386932373047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,48,8,128,1,float16,float16,0,0.9039039611816406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,1,128,1,float16,float16,0,0.3415466547012329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,48,128,1,float16,float16,0,1.306063969930013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.2965386708577474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.34651732444763184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,1.23909330368042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.9573866526285807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,2,128,1,float16,float16,0,0.34039998054504395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.6770400206247965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.341045339902242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.03696000079313914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,4,128,1,float16,float16,0,0.396565318107605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,1,128,1,float16,float16,0,0.15779733657836914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,2,128,1,float16,float16,0,0.1694613297780355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,48,8,128,1,float16,float16,0,0.4890880187352498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.4456373453140259
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.5394399960835775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.04274666806062063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.05412266651789347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,4,128,1,float16,float16,0,0.1876373291015625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,48,128,1,float16,float16,0,0.22166399161020914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,1,128,1,float16,float16,0,0.075013334552447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.024319998919963837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,8,128,1,float16,float16,0,0.20642666021982828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,2,128,1,float16,float16,0,0.07771199941635132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.0841919978459676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,48,48,128,1,float16,float16,0,0.5991040070851644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.02568000058333079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,4,128,1,float16,float16,0,0.08155733346939087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.263973335425059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,1,128,1,float16,float16,0,0.04038399954636892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.041189332803090416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.10531733433405559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,48,128,1,float16,float16,0,0.05096533397833506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,2,128,1,float16,float16,0,0.04159466673930486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,48,8,128,1,float16,float16,0,0.08196266492207845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.1055626670519511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,4,128,1,float16,float16,0,0.043738668163617454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.10896533727645874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.1912053426106771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,2,128,1,float16,float16,0,0.024608001112937927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.06000000238418579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.0601440022389094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,48,128,1,float16,float16,0,0.03105599929889043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,1,128,1,float16,float16,0,0.025413334369659424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,48,8,128,1,float16,float16,0,0.04628799855709076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,4,128,1,float16,float16,0,0.02568000058333079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.11724799871444702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.06124266485373179
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,1,128,1,float16,float16,0,0.016693333784739178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.036720000207424164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,48,8,128,1,float16,float16,0,0.026357332865397137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,2,128,1,float16,float16,0,0.017152000218629837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.03756800045569738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,48,128,1,float16,float16,0,0.01985599969824155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.06620266536871593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.10061867038408916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,4,128,1,float16,float16,0,0.016832000265518825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,48,8,128,1,float16,float16,0,0.017344000438849132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.03731200098991394
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.05917333563168844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.04091199984153112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,1.3583839734395344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,1,128,1,float16,float16,0,1.4450613657633464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,1.3702826499938965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,2,128,1,float16,float16,0,1.558341344197591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,4,128,1,float16,float16,0,1.6411573092142742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,1.5851732889811199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,2.013264020284017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,1,128,1,float16,float16,0,0.7124640146891276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,2,128,1,float16,float16,0,0.7163733641306559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,48,8,128,1,float16,float16,0,1.9693546295166016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,48,128,1,float16,float16,0,2.4309867223103843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.6781439781188965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.6901013056437174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,2.484058698018392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,4,128,1,float16,float16,0,0.7930933634440104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,1,128,1,float16,float16,0,0.3574719826380412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,48,128,1,float16,float16,0,1.2219626903533936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.32712533076604206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.8218239943186442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,48,8,128,1,float16,float16,0,0.9586613178253174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,2,128,1,float16,float16,0,0.34988800684611004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,1.1644906997680664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.9691946506500244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.3503893216451009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,48,128,1,float16,float16,0,0.6005973418553671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,4,128,1,float16,float16,0,0.40619198481241864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,48,8,128,1,float16,float16,0,0.5064693291982015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.3736799955368042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.037861332297325134
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.41231465339660645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,2,128,1,float16,float16,0,0.1355839967727661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.4705173174540202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.04264533519744873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.057274664441744484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,8,128,1,float16,float16,0,0.18531733751296997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,48,128,1,float16,float16,0,0.21585599581400552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,4,128,1,float16,float16,0,0.17599467436472574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.07625600198904674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,48,1,128,1,float16,float16,0,0.12533866365750632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.024058667321999867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,1,128,1,float16,float16,0,0.05858666698137919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,4,128,1,float16,float16,0,0.06471999982992808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.026144000391165417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,2,128,1,float16,float16,0,0.060122668743133545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.03349866718053818
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,48,8,128,1,float16,float16,0,0.06436266501744588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,48,128,1,float16,float16,0,0.04251733422279358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.038165333370367684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,2,128,1,float16,float16,0,0.03370666752258936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,1,128,1,float16,float16,0,0.032730666299661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.21811733643213907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.022069332500298817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,4,128,1,float16,float16,0,0.03621333340803782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,48,128,1,float16,float16,0,0.024351999163627625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,48,8,128,1,float16,float16,0,0.035877334574858345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,1,128,1,float16,float16,0,0.01939733326435089
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.023738667368888855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,4,128,1,float16,float16,0,0.0206133338312308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.01551466683546702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,2,128,1,float16,float16,0,0.019573333362738293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.08090666433175404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.01551466683546702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.041120000183582306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,48,8,128,1,float16,float16,0,0.020901332298914593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.013861333330472311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,48,128,1,float16,float16,0,0.01613866661985715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,1,128,1,float16,float16,0,0.013663999736309052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,2,128,1,float16,float16,0,0.013701333353916803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,4,128,1,float16,float16,0,0.014080000420411428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.014106666048367819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,48,8,128,1,float16,float16,0,0.013925333817799887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,48,128,1,float16,float16,0,0.010255999863147736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.014197333405415217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,1,128,1,float16,float16,0,0.009349333122372627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,2,128,1,float16,float16,0,0.009279999881982803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.014101333916187286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.026693334182103474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,4,128,1,float16,float16,0,0.009381333366036415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,48,8,128,1,float16,float16,0,0.009423999736706415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.013461332768201828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.7525013287862142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.6450186570485433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.6805386543273926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,4,128,1,float16,float16,0,0.7932480176289877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,1,128,1,float16,float16,0,0.6852320035298666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,2,128,1,float16,float16,0,0.7871253490447998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,48,8,128,1,float16,float16,0,1.0414613087972004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,1,128,1,float16,float16,0,0.33472001552581787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.30595733722050983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,2,128,1,float16,float16,0,0.35441601276397705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,48,128,1,float16,float16,0,1.2319413026173909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.9688373406728109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,1.1817386945088704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.34279998143513996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,8,128,1,float16,float16,0,0.4774986505508423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.40547200043996173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,48,4,128,1,float16,float16,0,0.38264532883961994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.03676266719897588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.31889599561691284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,2,128,1,float16,float16,0,0.1279306709766388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.05776533484458923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,48,128,1,float16,float16,0,0.5968480110168457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,4,128,1,float16,float16,0,0.16116799910863241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.49910934766133624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,8,128,1,float16,float16,0,0.1827039917310079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.08816533287366231
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,48,128,1,float16,float16,0,0.21986132860183716
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.02407466620206833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.025834667185942333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,4,128,1,float16,float16,0,0.05966933568318685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.17974400520324707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,8,128,1,float16,float16,0,0.06039999922116598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.037632000943024956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,48,128,1,float16,float16,0,0.03985599925120672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.018063999712467194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.03340800106525421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.018895999838908512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,1,128,1,float16,float16,0,0.03070933371782303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.05579733351866404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.022128000855445862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,8,128,1,float16,float16,0,0.033615998923778534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,4,128,1,float16,float16,0,0.033770665526390076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,48,2,128,1,float16,float16,0,0.03140799949566523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,48,128,1,float16,float16,0,0.022570667167504627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.02362666775782903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,1,128,1,float16,float16,0,0.05448000133037567
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,1,128,1,float16,float16,0,0.0180479995906353
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,48,2,128,1,float16,float16,0,0.055813332398732506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,2,128,1,float16,float16,0,0.019424000134070713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,48,1,128,1,float16,float16,0,0.12653866410255432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,8,128,1,float16,float16,0,0.01940800001223882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,48,4,128,1,float16,float16,0,0.019082666685183842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.015498666713635126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.013786666095256805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.03400533397992452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,2,128,1,float16,float16,0,0.013125333935022354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.013818666338920593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.020256000260512035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.014053333550691605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,8,128,1,float16,float16,0,0.01328533391157786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,48,128,1,float16,float16,0,0.009999999776482582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,4,128,1,float16,float16,0,0.013301332791646322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,48,48,128,1,float16,float16,0,0.015141333142916361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,2,128,1,float16,float16,0,0.008890666688481966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.014096000542243322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,8,128,1,float16,float16,0,0.009056000038981438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.015397333850463232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,48,4,128,1,float16,float16,0,0.009061333412925402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,48,128,1,float16,float16,0,0.009103999783595404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,1,128,1,float16,float16,0,0.00847999999920527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.013349333157142004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.012597333639860153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,2,128,1,float16,float16,0,0.0085333331177632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,4,128,1,float16,float16,0,0.008757333581646284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,48,8,128,1,float16,float16,0,0.008757333581646284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.014720000326633453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,8,128,1,float16,float16,0,0.48844265937805176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,1,128,1,float16,float16,0,0.3904373248418172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.35420799255371094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.3493280013402303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,2,128,1,float16,float16,0,0.4241600036621094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,48,4,128,1,float16,float16,0,0.4437493483225505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.37617067495981854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,48,128,1,float16,float16,0,0.5876479943593343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,1,128,1,float16,float16,0,0.19156267245610556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.04744000236193339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.041509332756201424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.5897333224614462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,4,128,1,float16,float16,0,0.22065067291259766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,2,128,1,float16,float16,0,0.20119466384251913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.07276799778143565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,48,8,128,1,float16,float16,0,0.23749866088231406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,1,128,1,float16,float16,0,0.09240532914797465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,48,128,1,float16,float16,0,0.2413813273111979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,2,128,1,float16,float16,0,0.09403733412424724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.13271466890970865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.030117332935333252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.028170667588710785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,4,128,1,float16,float16,0,0.09764800469080608
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.4869653383890788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,48,128,1,float16,float16,0,0.0576853354771932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,48,8,128,1,float16,float16,0,0.09827733039855957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,1,128,1,float16,float16,0,0.04868266483147939
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.052442664901415505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.02276800076166789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,4,128,1,float16,float16,0,0.05097066859404246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.03819733361403147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.026133333643277485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,2,128,1,float16,float16,0,0.048954665660858154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,48,8,128,1,float16,float16,0,0.05105599761009216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.09429333607355754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.27565866708755493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.028666667640209198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,48,128,1,float16,float16,0,0.03262399882078171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,1,128,1,float16,float16,0,0.027957332630952198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.01982933282852173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,4,128,1,float16,float16,0,0.028912000358104706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.05233600238958994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,8,128,1,float16,float16,0,0.02920000006755193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.021770666042963665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,48,128,1,float16,float16,0,0.01838933303952217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.02046400060256322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,48,2,128,1,float16,float16,0,0.027786667148272198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,4,128,1,float16,float16,0,0.016688000410795212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.018383999665578205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.01836799954374631
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,2,128,1,float16,float16,0,0.016250666230916977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,1,128,1,float16,float16,0,0.016282666474580765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.034202667574087776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.017808000246683758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,48,8,128,1,float16,float16,0,0.01643199970324834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,48,128,1,float16,float16,0,0.013189333180586496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,1,128,1,float16,float16,0,0.012063999970753988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.017978666971127193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,4,128,1,float16,float16,0,0.012080000092585882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,2,128,1,float16,float16,0,0.012063999970753988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,48,8,128,1,float16,float16,0,0.012213333199421564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.017642666896184284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,48,128,1,float16,float16,0,0.00895999992887179
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,1,128,1,float16,float16,0,0.008373333141207695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,2,128,1,float16,float16,0,0.008314666648705801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.02035733312368393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,8,128,1,float16,float16,0,0.008378666515151659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,48,4,128,1,float16,float16,0,0.00843733362853527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.017466666797796886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.017530667285124462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,48,128,1,float16,float16,0,0.008869333192706108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,1,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.016693333784739178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,2,128,1,float16,float16,0,0.008101333553592363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.018789333601792652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,4,128,1,float16,float16,0,0.008127999802430471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,48,8,128,1,float16,float16,0,0.00814933329820633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.018325333793958027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,2,128,1,float16,float16,0,0.36084266503651935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.04387733340263367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.06260799864927928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,1,128,1,float16,float16,0,0.35374399026234943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,4,128,1,float16,float16,0,0.3794026772181193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.09823999802271526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,48,8,128,1,float16,float16,0,0.39316264788309735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,48,128,1,float16,float16,0,0.31116267045338947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,2,128,1,float16,float16,0,0.17036267121632895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.2057173252105713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,1,128,1,float16,float16,0,0.16996800899505615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.028538666665554047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.37969064712524414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,4,128,1,float16,float16,0,0.17486933867136636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.03226666649182638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.05154666801293691
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,48,8,128,1,float16,float16,0,0.17536000410715738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,48,128,1,float16,float16,0,0.09637332955996196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.07633600135644276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.021701333423455555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,1,128,1,float16,float16,0,0.08675199747085571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,2,128,1,float16,float16,0,0.08762666583061218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,4,128,1,float16,float16,0,0.08921600381533305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.018789333601792652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.041519999504089355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,48,128,1,float16,float16,0,0.05026666820049286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,48,8,128,1,float16,float16,0,0.08945600191752116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,1,128,1,float16,float16,0,0.04565866788228353
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.028058665494124096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.15003732840220133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,2,128,1,float16,float16,0,0.04548266530036926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,8,128,1,float16,float16,0,0.04660800099372864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.02085866779088974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,48,4,128,1,float16,float16,0,0.04635733366012573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.08091199894746144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.023605334262053173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,1,128,1,float16,float16,0,0.026586666703224182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,48,128,1,float16,float16,0,0.02863466739654541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,2,128,1,float16,float16,0,0.02638400097688039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.04456533491611481
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,8,128,1,float16,float16,0,0.026661333938439686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.01759999990463257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.017765333255132038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,48,4,128,1,float16,float16,0,0.026565333207448322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,48,128,1,float16,float16,0,0.016549333930015564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,1,128,1,float16,float16,0,0.015466666469971338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,2,128,1,float16,float16,0,0.015413332730531693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.017551999539136887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.01841066653529803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,4,128,1,float16,float16,0,0.015589332828919092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.01676799977819125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,48,8,128,1,float16,float16,0,0.01563199982047081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,48,128,1,float16,float16,0,0.012181332955757776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,1,128,1,float16,float16,0,0.011509332805871964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,4,128,1,float16,float16,0,0.011616000284751257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,2,128,1,float16,float16,0,0.011621333658695221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.03156266609827677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,48,8,128,1,float16,float16,0,0.011541333049535751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,48,128,1,float16,float16,0,0.008687999720374743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.016645333419243496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,1,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.019839999576409657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,2,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,4,128,1,float16,float16,0,0.008176000167926153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.018533332894245785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,1,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,48,8,128,1,float16,float16,0,0.008101333553592363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,48,128,1,float16,float16,0,0.008410666758815447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,2,128,1,float16,float16,0,0.007893333211541176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.016544000556071598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.016575999557971954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,4,128,1,float16,float16,0,0.00810666692753633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,48,8,128,1,float16,float16,0,0.007946666950980822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.018277333428462345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,1,128,1,float16,float16,0,0.014906667172908783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,fp8,0,0.01882133384545644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,2,128,1,float16,float16,0,0.028991999725500744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,8,128,1,float16,float16,0,0.06736533343791962
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.03681600093841553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,48,4,128,1,float16,float16,0,0.043023998538653054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,fp8,0,0.02497066557407379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,1,128,1,float16,float16,0,0.010079999764760336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,48,128,1,float16,float16,0,0.15414933363596597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,fp8,0,0.014405333747466406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,fp8,0,0.0163680004576842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,2,128,1,float16,float16,0,0.01893866683046023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.056074668963750206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,4,128,1,float16,float16,0,0.02586666742960612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.13385066390037537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,48,8,128,1,float16,float16,0,0.03959999978542328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.033733333150545754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.02236266682545344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,48,128,1,float16,float16,0,0.07981866598129272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,1,128,1,float16,float16,0,0.00961599995692571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,2,128,1,float16,float16,0,0.014080000420411428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,8,128,1,float16,float16,0,0.02478400121132533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,48,4,128,1,float16,float16,0,0.017658667018016178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.015146666516860327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,48,128,1,float16,float16,0,0.042394667863845825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.07283733288447063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.021301334102948506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,1,128,1,float16,float16,0,0.00938666673998038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,fp8,0,0.012266666938861212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,2,128,1,float16,float16,0,0.01320533330241839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.042170668641726174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,8,128,1,float16,float16,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,48,4,128,1,float16,float16,0,0.013434667140245438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,1,128,1,float16,float16,0,0.009216000015536943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,48,128,1,float16,float16,0,0.025274666647116344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.0144213338692983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,2,128,1,float16,float16,0,0.012666666259368261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,fp8,0,0.011503999431928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,4,128,1,float16,float16,0,0.012885333349307379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.011946666985750198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,fp8,0,0.011711999773979187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,48,8,128,1,float16,float16,0,0.012928000340859095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,48,128,1,float16,float16,0,0.01498666654030482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,fp8,0,0.011530666301647821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.013760000467300415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,1,128,1,float16,float16,0,0.00914666677514712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,2,128,1,float16,float16,0,0.012517333030700684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,4,128,1,float16,float16,0,0.012746666868527731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.028064000109831493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.011567999919255575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,48,8,128,1,float16,float16,0,0.012831999609867731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.02176533391078313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.013690666606028875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,48,128,1,float16,float16,0,0.011488000551859537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,2,128,1,float16,float16,0,0.012448000411192576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,4,128,1,float16,float16,0,0.012400000045696894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,48,8,128,1,float16,float16,0,0.012383999923865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.016805333395799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,48,128,1,float16,float16,0,0.008383999889095625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,2,128,1,float16,float16,0,0.012133333832025528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,4,128,1,float16,float16,0,0.012149333953857422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,48,8,128,1,float16,float16,0,0.01221866657336553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.011472000430027643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,48,128,1,float16,float16,0,0.008154666672150293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,2,128,1,float16,float16,0,0.00873066671192646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,fp8,0,0.010661333799362183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,4,128,1,float16,float16,0,0.0084906667470932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,48,8,128,1,float16,float16,0,0.008832000195980072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,4.882885297139485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,6.051989237467448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,9.524794896443685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,27.944905598958332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,2.3175946871439614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,40,128,1,float16,float16,0,70.20937601725261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,1,128,1,float16,float16,0,65.04304504394531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,3.0244318644205728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,5.047946612040202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,2,128,1,float16,float16,0,68.88426717122395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,14.426138559977213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,1,128,1,float16,float16,0,137.095458984375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,2,128,1,float16,float16,0,136.72138468424478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,4,128,1,float16,float16,0,136.20704142252603
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,1.1708426475524902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,40,8,128,1,float16,float16,0,138.39335123697916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,1.5414719581604004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,4,128,1,float16,float16,0,67.9186503092448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,143.30048624674478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,2.507477283477783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,40,128,1,float16,float16,0,34.95709991455078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,40,8,128,1,float16,float16,0,68.31350199381511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,6.2894134521484375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,1,128,1,float16,float16,0,33.62335968017578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,0.5969706773757935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,2,128,1,float16,float16,0,33.090965270996094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,0.8160213629404703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,4,128,1,float16,float16,0,33.14892323811849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,1.3457013765970867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,40,128,1,float16,float16,0,19.304602305094402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,1,128,1,float16,float16,0,16.35590362548828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,2,128,1,float16,float16,0,17.429487864176433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,3.3584105173746743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,4,128,1,float16,float16,0,16.299264272054035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,3.4152212142944336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,4.081765174865723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,40,8,128,1,float16,float16,0,36.029317220052086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,40,8,128,1,float16,float16,0,16.694085439046223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,6.374517440795898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,37.84217071533203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,73.78256225585938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,15.703994750976562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,1.7121973037719727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,40,128,1,float16,float16,0,39.948646545410156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,2.1753172874450684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,1,128,1,float16,float16,0,37.86050160725912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,3.6162986755371094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,2,128,1,float16,float16,0,38.33560434977213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,1,128,1,float16,float16,0,77.99371337890625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,7.954581578572591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,2,128,1,float16,float16,0,79.16696166992188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,4,128,1,float16,float16,0,76.50656636555989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,0.8891626993815104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,40,8,128,1,float16,float16,0,78.68226114908855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,1.1188373565673828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,40,128,1,float16,float16,0,19.931371053059895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,1.7131627400716145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,1,128,1,float16,float16,0,20.759440104166668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,4,128,1,float16,float16,0,39.63140360514323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,2,128,1,float16,float16,0,18.847984313964844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,85.18208312988281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,40,8,128,1,float16,float16,0,39.03363291422526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,3.7489067713419595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,0.4441759983698527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,0.5545119841893514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,4,128,1,float16,float16,0,18.635770161946613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,0.8868426481882731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,40,128,1,float16,float16,0,9.572965621948242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,1,128,1,float16,float16,0,9.194906870524088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,1.9229119618733723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,2,128,1,float16,float16,0,9.069045384724935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,4,128,1,float16,float16,0,9.094501495361328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,40,8,128,1,float16,float16,0,8.543589274088541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,40,8,128,1,float16,float16,0,19.873514811197918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,2.991461435953776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,3.3164745966593423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,41.76797231038412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,20.143658955891926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,5.036538759867351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,11.24959945678711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,1.4186399777730305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,40,128,1,float16,float16,0,28.641568501790363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,1,128,1,float16,float16,0,26.599894205729168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,1.7765332857767742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,2.592298666636149
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,2,128,1,float16,float16,0,25.987953186035156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,1,128,1,float16,float16,0,55.18413289388021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,5.620992024739583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,2,128,1,float16,float16,0,56.33543395996094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,4,128,1,float16,float16,0,55.56036376953125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,40,8,128,1,float16,float16,0,53.088104248046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,0.7300906976064047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,4,128,1,float16,float16,0,27.3184331258138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,0.9374826749165853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,59.26395161946615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,40,128,1,float16,float16,0,15.067082722981771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.313157320022583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,40,8,128,1,float16,float16,0,26.7718989054362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,1,128,1,float16,float16,0,14.031002044677734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,2,128,1,float16,float16,0,13.050186157226562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,2.8159891764322915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.36605334281921387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.4444906711578369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,40,128,1,float16,float16,0,6.551562627156575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,1,128,1,float16,float16,0,6.056853612263997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.6817546685536703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,2,128,1,float16,float16,0,6.12986691792806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,4,128,1,float16,float16,0,13.134756724039713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,4,128,1,float16,float16,0,6.236885070800781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,1.5038080215454102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,40,8,128,1,float16,float16,0,14.452383677164713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,4.45028813680013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,29.70501963297526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,40,8,128,1,float16,float16,0,6.210336049397786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,14.257887522379557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,5.3901011149088545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,7.356741587320964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,15.40671412150065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,2.387200037638346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,40,128,1,float16,float16,0,37.43590291341146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,1,128,1,float16,float16,0,34.045850118001304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,2.740133285522461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,3.9253600438435874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,2,128,1,float16,float16,0,34.23133850097656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,1,128,1,float16,float16,0,71.59455871582031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,7.738698959350586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,2,128,1,float16,float16,0,71.27901204427083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,4,128,1,float16,float16,0,75.5694071451823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,1.1393919785817463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,40,8,128,1,float16,float16,0,75.95098368326823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,4,128,1,float16,float16,0,35.235669453938804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.3832213083902996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,75.05535888671875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,1.9595093727111816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,40,128,1,float16,float16,0,18.36630376180013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,1,128,1,float16,float16,0,17.71017074584961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,40,8,128,1,float16,float16,0,34.54233042399088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,4.0204159418741865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.5923839807510376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,2,128,1,float16,float16,0,17.37546666463216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,0.6952266693115234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,40,128,1,float16,float16,0,8.67471440633138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,0.9937919775644938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,1,128,1,float16,float16,0,8.079343795776367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,4,128,1,float16,float16,0,18.571205139160156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,2,128,1,float16,float16,0,7.765541076660156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,2.0212319691975913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,40,8,128,1,float16,float16,0,18.94927469889323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,4,128,1,float16,float16,0,8.453754425048828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.1699999968210856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,40,128,1,float16,float16,0,4.117578824361165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.31060266494750977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,40,8,128,1,float16,float16,0,8.2980105082194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,1,128,1,float16,float16,0,3.427493413289388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.5232426722844442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,19.000111897786457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,2,128,1,float16,float16,0,3.6572478612264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,1.0638346672058105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,4,128,1,float16,float16,0,3.4149653116861978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,40,8,128,1,float16,float16,0,3.748250643412272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,39.31952412923177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,9.116554896036783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,3.560837427775065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,3.7613439559936523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,5.137610753377278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,9.682783762613932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,1.7851039568583171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,40,128,1,float16,float16,0,21.125717163085938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,1,128,1,float16,float16,0,19.432095845540363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.0063093503316245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.7000907262166343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,1,128,1,float16,float16,0,39.01087951660156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,2,128,1,float16,float16,0,19.917029062906902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,2,128,1,float16,float16,0,42.58313496907552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,4,128,1,float16,float16,0,40.10124206542969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,4.864543914794922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,40,8,128,1,float16,float16,0,42.68242899576823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,0.8281226952870687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,0.9999679724375407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,4,128,1,float16,float16,0,19.57650629679362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,40,128,1,float16,float16,0,10.525813420613607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.3384159406026204
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,43.19569396972656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,1,128,1,float16,float16,0,10.15939203898112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,2.515413284301758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,2,128,1,float16,float16,0,9.846021016438803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.42189331849416095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,40,8,128,1,float16,float16,0,20.59881591796875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,4,128,1,float16,float16,0,9.885808308919271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,40,128,1,float16,float16,0,4.677189191182454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,1,128,1,float16,float16,0,4.07371203104655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.5052533149719238
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.6662933429082235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,40,8,128,1,float16,float16,0,10.144037246704102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,2,128,1,float16,float16,0,4.1426239013671875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,1.205189307530721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,4,128,1,float16,float16,0,4.168944040934245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,40,8,128,1,float16,float16,0,4.198191960652669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,40,128,1,float16,float16,0,2.5643040339152017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.07431999842325847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,1,128,1,float16,float16,0,1.8988693555196126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,11.137077331542969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.12802666425704956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.25840532779693604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,22.66893768310547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,2,128,1,float16,float16,0,1.9965440432230632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.6546719868977865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,4,128,1,float16,float16,0,1.950032075246175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,4.826021194458008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,40,8,128,1,float16,float16,0,2.14030392964681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,4.336858749389648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.8015947341918945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,6.2184797922770185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,10.367167790730795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.1743200620015464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,1,128,1,float16,float16,0,18.615652720133465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,40,128,1,float16,float16,0,20.98095957438151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.439136028289795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,3.114543914794922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,1,128,1,float16,float16,0,37.45797220865885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,4,128,1,float16,float16,0,38.13154093424479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,2,128,1,float16,float16,0,40.539232889811196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,2,128,1,float16,float16,0,18.90216573079427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,40,8,128,1,float16,float16,0,39.19180806477865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,5.303615887959798
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.1632213592529297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.2847253481547039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,4,128,1,float16,float16,0,19.293392181396484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,1,128,1,float16,float16,0,8.834309260050455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.6601653099060059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,40,128,1,float16,float16,0,10.260559717814127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,2,128,1,float16,float16,0,8.521573384602865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,39.75923156738281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,40,8,128,1,float16,float16,0,18.97884750366211
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,2.796559969584147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,4,128,1,float16,float16,0,7.613791783650716
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,40,128,1,float16,float16,0,4.591477394104004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.6210933526357015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,1,128,1,float16,float16,0,3.924053192138672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.7778666814168295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,40,8,128,1,float16,float16,0,8.845424016316732
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.5777386824289957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,2,128,1,float16,float16,0,3.473269462585449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,20.981792449951172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,1.3254400094350178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.16759467124938965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,4,128,1,float16,float16,0,3.623157183329264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,9.930389404296875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,1,128,1,float16,float16,0,2.0335572560628257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,40,8,128,1,float16,float16,0,3.9801918665568032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.2664960026741028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.41684265931447345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,2,128,1,float16,float16,0,1.837733268737793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,40,128,1,float16,float16,0,2.288501262664795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.7074133555094401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,4,128,1,float16,float16,0,1.9237707455952961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,2,128,1,float16,float16,0,0.9677866299947103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,4.32095464070638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,40,8,128,1,float16,float16,0,1.969648043314616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.061568001906077065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,2.148341337839762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.04548799991607666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,1,128,1,float16,float16,0,0.875279982884725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,40,128,1,float16,float16,0,1.216597318649292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.12397866447766621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,4,128,1,float16,float16,0,0.9164853096008301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.28458666801452637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,40,8,128,1,float16,float16,0,0.9740586280822754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,3.7639945348103843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,4.433290799458821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,3.4777278900146484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,6.989695866902669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.7351627349853516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,1,128,1,float16,float16,0,10.018501281738281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,2,128,1,float16,float16,0,22.14367930094401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,40,128,1,float16,float16,0,12.041765848795572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,4,128,1,float16,float16,0,22.76233164469401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.8097119331359863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,8,128,1,float16,float16,0,22.131306966145832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,2.364528020222982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,40,1,128,1,float16,float16,0,21.759061177571613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,3.6614294052124023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.8708960215250651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,2,128,1,float16,float16,0,10.038810729980469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,1,128,1,float16,float16,0,4.33953062693278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,4,128,1,float16,float16,0,10.746037801106771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.9142399628957113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,40,128,1,float16,float16,0,5.821610768636067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,24.467567443847656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,40,8,128,1,float16,float16,0,10.562949498494467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,1.1775840123494465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,2,128,1,float16,float16,0,4.324565251668294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,1.8366880416870117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,4,128,1,float16,float16,0,4.932784080505371
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,40,8,128,1,float16,float16,0,4.59991995493571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.4357813199361165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,40,128,1,float16,float16,0,2.838794708251953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.4457333485285441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,1,128,1,float16,float16,0,2.17793607711792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,12.788864135742188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,2,128,1,float16,float16,0,2.3317333857218423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.5672426621119181
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.8747680187225342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,4,128,1,float16,float16,0,2.0758773485819497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,40,8,128,1,float16,float16,0,2.153264045715332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,40,128,1,float16,float16,0,1.4797760645548503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,5.146517435709636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.06211733321348826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,1,128,1,float16,float16,0,1.036357323328654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.09578133622805278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,2,128,1,float16,float16,0,1.192576011021932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.21030400196711221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,4,128,1,float16,float16,0,1.1172640323638916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,40,128,1,float16,float16,0,0.7197226683298746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,2.6329867045084634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,40,8,128,1,float16,float16,0,1.1024586359659831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.43383999665578205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.05198933184146881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,1,128,1,float16,float16,0,0.5581653515497843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.036271999279658
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,4,128,1,float16,float16,0,0.5398773352305094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.09267733494440715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,2,128,1,float16,float16,0,0.5381706555684408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,40,8,128,1,float16,float16,0,0.548416018486023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,1.3282506465911865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.19928000370661417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,4.328591982523601
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,5.870922724405925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,5.02350393931071
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,4,128,1,float16,float16,0,21.79828389485677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,40,128,1,float16,float16,0,12.233348846435547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,1,128,1,float16,float16,0,21.892934163411457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,8.604688008626303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,8,128,1,float16,float16,0,22.776405334472656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,2.2065653800964355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,1,128,1,float16,float16,0,8.76248550415039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,2.3872373898824057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,40,2,128,1,float16,float16,0,21.167696634928387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,2.976901372273763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,4.16867733001709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,2,128,1,float16,float16,0,10.077093124389648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,8,128,1,float16,float16,0,8.406501134236654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,24.19988250732422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,40,128,1,float16,float16,0,5.973178863525391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,1,128,1,float16,float16,0,4.099258740743001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,1.1618666648864746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,40,4,128,1,float16,float16,0,9.481088002522787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,1.1845173041025798
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,1.488437334696452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,2.0978612899780273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,4,128,1,float16,float16,0,4.339797337849935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,8,128,1,float16,float16,0,4.45194149017334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,1,128,1,float16,float16,0,2.0402132670084634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.574506680170695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,40,2,128,1,float16,float16,0,4.0272213617960615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,40,128,1,float16,float16,0,2.989034652709961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.6183840036392212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.7102506955464681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,2,128,1,float16,float16,0,1.9349385897318523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,4,128,1,float16,float16,0,2.0692747433980307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,5.227802594502767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,1.023370663324992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.15775466958681741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,40,8,128,1,float16,float16,0,2.0898292859395347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,1,128,1,float16,float16,0,0.9671306610107422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,40,128,1,float16,float16,0,1.4945707321166992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,2,128,1,float16,float16,0,1.0161759853363037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.2417280077934265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,10.757999420166016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,4,128,1,float16,float16,0,1.023967981338501
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.3465546766916911
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,40,8,128,1,float16,float16,0,1.032480001449585
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.4675146738688151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,40,128,1,float16,float16,0,0.7195786635080973
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,2.6437066396077475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.04773333172003428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.03844266633192698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.08796800176302592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,2,128,1,float16,float16,0,0.5067040125528971
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,1,128,1,float16,float16,0,0.5120799938837687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,4,128,1,float16,float16,0,0.5284106731414795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.16566399733225504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,40,128,1,float16,float16,0,0.3766186634699504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.028416000306606293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,40,8,128,1,float16,float16,0,0.5430933237075806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,1,128,1,float16,float16,0,0.2702239950497945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.033615998923778534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,2,128,1,float16,float16,0,0.26606400807698566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,1.355002721150716
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,4,128,1,float16,float16,0,0.2823306719462077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.6384640137354533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.057445332407951355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,40,8,128,1,float16,float16,0,0.27589333057403564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.10077333450317383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,4.437045415242513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,3.471738815307617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,3.5080267588297525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,1,128,1,float16,float16,0,11.370564778645834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,4,128,1,float16,float16,0,12.556869506835938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,8,128,1,float16,float16,0,12.973733266194662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,5.763925552368164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,40,2,128,1,float16,float16,0,12.871888478597006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,1.7359840075174968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,1,128,1,float16,float16,0,5.103226661682129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,1.7528533935546875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,40,128,1,float16,float16,0,7.6539357503255205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,2,128,1,float16,float16,0,4.788474718729655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,4,128,1,float16,float16,0,5.2345279057820635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,2.871978759765625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,40,128,1,float16,float16,0,3.91045347849528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,40,8,128,1,float16,float16,0,5.3167680104573565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.8212266763051351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,13.857162475585938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,2.2208053270975747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,2,128,1,float16,float16,0,2.227562745412191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,1.0193440119425456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.932703971862793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,1,128,1,float16,float16,0,2.204528013865153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,6.449285507202148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,8,128,1,float16,float16,0,2.7648585637410483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,40,4,128,1,float16,float16,0,2.5888373057047525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.40065598487854004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,1.520917256673177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,40,128,1,float16,float16,0,1.924506664276123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.42029865582784015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,1,128,1,float16,float16,0,1.1674453417460124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.4928799867630005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,2,128,1,float16,float16,0,1.1502026716868083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,4,128,1,float16,float16,0,1.1965546607971191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.7084746360778809
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,40,128,1,float16,float16,0,0.9359253247578939
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,3.231797218322754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,1,128,1,float16,float16,0,0.6095146735509237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,2,128,1,float16,float16,0,0.6028533379236857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.05205333232879639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.07482666770617168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,4,128,1,float16,float16,0,0.6287466684977213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,40,8,128,1,float16,float16,0,1.3162826697031658
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.15726932883262634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.31284799178441364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,1,128,1,float16,float16,0,0.2957119941711426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,40,128,1,float16,float16,0,0.487338662147522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,40,8,128,1,float16,float16,0,0.6588213443756104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,2,128,1,float16,float16,0,0.34009599685668945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.03736533224582672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,4,128,1,float16,float16,0,0.3089066743850708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.031194667021433514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,1.6849600474039714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,40,8,128,1,float16,float16,0,0.3086293339729309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.06693333387374878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.13388267159461975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,40,128,1,float16,float16,0,0.19375999768575033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,4,128,1,float16,float16,0,0.1779093345006307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,1,128,1,float16,float16,0,0.16940265893936157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,2,128,1,float16,float16,0,0.16892266273498535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.02916266769170761
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.03990400085846583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.8180586496988932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.37172265847524005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,40,8,128,1,float16,float16,0,0.17511467138926187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.07991999884446462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,4.315418561299642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,4.690976142883301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,7.594869613647461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,5.484399795532227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,8,128,1,float16,float16,0,13.009717305501303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,2,128,1,float16,float16,0,11.523184458414713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,1,128,1,float16,float16,0,4.558352152506511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,2.1661012967427573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,1,128,1,float16,float16,0,12.773264567057291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,40,128,1,float16,float16,0,8.422576268513998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,2.360597292582194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,2,128,1,float16,float16,0,4.781930605570476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,40,4,128,1,float16,float16,0,12.549152374267578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,2.7706772486368814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,4,128,1,float16,float16,0,5.3677012125651045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,3.7966079711914062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,13.964251200358072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,40,128,1,float16,float16,0,4.191210746765137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,40,8,128,1,float16,float16,0,4.996410687764485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,1.074954668680827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,1.248687982559204
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,1,128,1,float16,float16,0,2.123568058013916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,1.7860533396402996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,1.45250670115153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,4,128,1,float16,float16,0,2.3484692573547363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,6.819845199584961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,1,128,1,float16,float16,0,1.1490026315053303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,8,128,1,float16,float16,0,2.6420532862345376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,40,2,128,1,float16,float16,0,2.227498690287272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.5668266614278158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,40,128,1,float16,float16,0,2.198842684427897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.644325335820516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.5746826728185018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,8,128,1,float16,float16,0,1.2831947008768718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,4,128,1,float16,float16,0,1.1779413223266602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,40,2,128,1,float16,float16,0,1.1273120244344075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,40,128,1,float16,float16,0,1.0672907034556072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.9095893700917562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,3.524863878885905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,1,128,1,float16,float16,0,0.5941760142644247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.2371679941813151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.15351466337839761
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,2,128,1,float16,float16,0,0.6086133321126302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,8,128,1,float16,float16,0,0.6475946505864462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.3161120017369588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,40,4,128,1,float16,float16,0,0.6191413402557373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,1,128,1,float16,float16,0,0.29313067595163983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,1.7725280125935872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,40,128,1,float16,float16,0,0.5369919935862223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.3991893529891968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.04053333401679993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,2,128,1,float16,float16,0,0.29739199082056683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.06645333270231883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,4,128,1,float16,float16,0,0.32153600454330444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.1183733344078064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,40,8,128,1,float16,float16,0,0.3434240023295085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,40,128,1,float16,float16,0,0.25490132967631024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.8338239987691244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,2,128,1,float16,float16,0,0.15558933218320212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.02272533377011617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,1,128,1,float16,float16,0,0.1551413337389628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.02762666592995326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,8,128,1,float16,float16,0,0.1601759990056356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.040805332362651825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,40,4,128,1,float16,float16,0,0.16325866182645163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.06522666911284129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,40,128,1,float16,float16,0,0.09674666325251262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,1,128,1,float16,float16,0,0.08833600083986919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,2,128,1,float16,float16,0,0.090229332447052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.39417600631713867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.08991466959317525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.10300266742706299
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,4,128,1,float16,float16,0,0.09156800309816997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.09345066547393799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,40,8,128,1,float16,float16,0,0.09297600388526917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.12350400288899739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.27060266335805255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,4.318282763163249
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,1,128,1,float16,float16,0,7.842618942260742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,5.030240058898926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,2,128,1,float16,float16,0,7.947973251342773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,5.906496047973633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,40,128,1,float16,float16,0,7.993701299031575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,1,128,1,float16,float16,0,2.705519994099935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,4,128,1,float16,float16,0,8.572682698567709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,2.159594694773356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,2.336671988169352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,2,128,1,float16,float16,0,2.8762025833129883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,4,128,1,float16,float16,0,3.174949328104655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,2.781834602355957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,10.368895848592123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,40,8,128,1,float16,float16,0,9.326853434244791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,1,128,1,float16,float16,0,1.350154717763265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,40,8,128,1,float16,float16,0,3.7438348134358725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,1.1753066380818684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,40,128,1,float16,float16,0,4.015343983968099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,3.586581230163574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,7.186431884765625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,1.1811199982961018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,1.4452853202819824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,4,128,1,float16,float16,0,1.5822933514912922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,8,128,1,float16,float16,0,1.912826697031657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,1.7828267415364583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,1,128,1,float16,float16,0,0.6942880153656006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,5.293391863505046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,2.6859572728474936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.5360906521479288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,4,128,1,float16,float16,0,0.7908746401468912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.6935199896494547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.5633493264516195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,1,128,1,float16,float16,0,0.3646026849746704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,40,128,1,float16,float16,0,1.0126240253448486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,2,128,1,float16,float16,0,0.7399146556854248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,8,128,1,float16,float16,0,0.9163359800974528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.14914666612943014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.8467466831207275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,40,2,128,1,float16,float16,0,1.4292426109313965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,2,128,1,float16,float16,0,0.37859201431274414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.23677333196004233
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.28806400299072266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,1.2926452954610188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.3523840109507243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,8,128,1,float16,float16,0,0.4344053268432617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,1,128,1,float16,float16,0,0.17177599668502808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,40,4,128,1,float16,float16,0,0.41012267271677655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,40,128,1,float16,float16,0,0.5278613169987997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.033386667569478355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,4,128,1,float16,float16,0,0.20515199502309164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,2,128,1,float16,float16,0,0.17891732851664224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.037920000652472176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.059861332178115845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,40,128,1,float16,float16,0,0.19724265734354654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.5840640068054199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.0983733336130778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,40,8,128,1,float16,float16,0,0.2308853268623352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.24674133459726968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,1,128,1,float16,float16,0,0.09122666716575623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.024133334557215374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.022661333282788593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,40,40,128,1,float16,float16,0,2.0229066212972007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,2,128,1,float16,float16,0,0.09390933314959209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,4,128,1,float16,float16,0,0.09891200065612793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.034416000048319496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.04839999973773956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,40,128,1,float16,float16,0,0.06320000191529591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,40,8,128,1,float16,float16,0,0.09816533327102661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,1,128,1,float16,float16,0,0.05235200126965841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,2,128,1,float16,float16,0,0.05268266797065735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.08893866340319316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,4,128,1,float16,float16,0,0.05509866774082184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.0902933379014333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.10987733801205952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.09738133351008098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,1,128,1,float16,float16,0,0.030576000610987347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,40,8,128,1,float16,float16,0,0.05598400036493937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,40,128,1,float16,float16,0,0.03489600121974945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.05266666909058889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.052469333012898765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,4,128,1,float16,float16,0,0.031957333286603294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,2,128,1,float16,float16,0,0.03067733347415924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.19793599843978882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.10732266306877136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,40,8,128,1,float16,float16,0,0.03221333275238673
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.05755733450253805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.06086933116118113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,2.3155946731567383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,1,128,1,float16,float16,0,2.622661272684733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,2.34388796488444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,2,128,1,float16,float16,0,2.676074663798014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,4,128,1,float16,float16,0,3.029952049255371
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,40,8,128,1,float16,float16,0,3.679877281188965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,2.9595359166463218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,1,128,1,float16,float16,0,1.1967626412709553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,1.0953120390574138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,40,128,1,float16,float16,0,4.340421358744304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,2,128,1,float16,float16,0,1.2022720177968342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,4.317402521769206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,4,128,1,float16,float16,0,1.3586559295654297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,1.1722986698150635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,1,128,1,float16,float16,0,0.541978677113851
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.5350293318430582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,40,128,1,float16,float16,0,2.0197653770446777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,1.4411999384562175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,2.148757298787435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,40,8,128,1,float16,float16,0,1.6635732650756836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,2,128,1,float16,float16,0,0.5864906708399454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.5637333393096924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,1.7933600743611653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,4,128,1,float16,float16,0,0.7142879962921143
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,40,8,128,1,float16,float16,0,0.8979413509368896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,3.8449494043986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,40,128,1,float16,float16,0,1.0160106817881267
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.6934239864349365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.8581866423288981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.15491732954978943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,1.00437331199646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,40,128,1,float16,float16,0,0.4873973528544108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.3423466682434082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,4,128,1,float16,float16,0,0.3445386489232381
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.2841920057932536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,8,128,1,float16,float16,0,0.4007360140482585
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,2,128,1,float16,float16,0,0.12501333157221475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.033088001112143196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,1,128,1,float16,float16,0,0.11883733669916789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.03794133414824804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,2,128,1,float16,float16,0,0.3063253362973531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.43432001272837323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,4,128,1,float16,float16,0,0.1393333375453949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,40,1,128,1,float16,float16,0,0.28961066404978436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.05463466544946035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,40,8,128,1,float16,float16,0,0.17428267002105713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.07580266892910004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,1,128,1,float16,float16,0,0.06453333298365276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,2,128,1,float16,float16,0,0.06671466430028279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,40,128,1,float16,float16,0,0.17040000359217325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.022485333184401195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,4,128,1,float16,float16,0,0.06992533306280772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.18126932779947916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.08925333619117737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,1,128,1,float16,float16,0,0.035530666510264076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,40,8,128,1,float16,float16,0,0.07074666519959767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,40,128,1,float16,float16,0,0.04363733530044556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.03162133445342382
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.03968533376852671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.22664000590642294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,2,128,1,float16,float16,0,0.036320000886917114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.09001066287358601
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.16532267133394876
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,4,128,1,float16,float16,0,0.03825066735347112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,40,128,1,float16,float16,0,0.025978667040665943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,40,8,128,1,float16,float16,0,0.038831998904546104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.09384000301361084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.1032319962978363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,2,128,1,float16,float16,0,0.021989333132902782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,1,128,1,float16,float16,0,0.022090665996074677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.05186666548252106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.0517493337392807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,4,128,1,float16,float16,0,0.02298133323589961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,40,8,128,1,float16,float16,0,0.02298133323589961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.0848640004793803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.05392000079154968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,40,128,1,float16,float16,0,0.018197332819302876
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,1,128,1,float16,float16,0,0.01613333324591319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.03326933334271113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.03346666693687439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,8,128,1,float16,float16,0,0.016480000068744022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.05725333094596863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,2,128,1,float16,float16,0,0.01617066686352094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,40,4,128,1,float16,float16,0,0.016447999825080235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.0330826664964358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.04573333263397217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.03711466739575068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,1,128,1,float16,float16,0,1.1560746828715007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,1.1632426579793294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,1.165226697921753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,2,128,1,float16,float16,0,1.3396746317545574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,4,128,1,float16,float16,0,1.4204319318135579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,40,8,128,1,float16,float16,0,1.734997272491455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,1.4707040786743164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.5354079802831014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,1,128,1,float16,float16,0,0.5952266852060953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,40,128,1,float16,float16,0,2.1817866961161294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,2.0201172828674316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,1.8121813138326008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.6068533261617025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,4,128,1,float16,float16,0,0.6738933722178141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.6398346821467081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,2,128,1,float16,float16,0,0.6068426767985026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,1,128,1,float16,float16,0,0.2818880081176758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.14989866813023886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,40,128,1,float16,float16,0,1.0099039872487385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,40,8,128,1,float16,float16,0,0.8599733511606852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,2,128,1,float16,float16,0,0.28566932678222656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.22316267093022665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.913653294245402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,4,128,1,float16,float16,0,0.3236746589342753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.28437866767247516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,40,128,1,float16,float16,0,0.4865866502126058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,40,8,128,1,float16,float16,0,0.41486398379007977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.34409598509470624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,1.0063412984212239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.03258133431275686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,2,128,1,float16,float16,0,0.10154666503270467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.038704000413417816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,40,128,1,float16,float16,0,0.15402666727701822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,4,128,1,float16,float16,0,0.11776533722877502
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.3930293321609497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,1,128,1,float16,float16,0,0.09553066889444987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.05487466851870219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.06494399905204773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.02239999920129776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,1,128,1,float16,float16,0,0.050053333242734276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.024336000283559162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.03154666721820831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,2,128,1,float16,float16,0,0.051882664362589516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,4,128,1,float16,float16,0,0.05669866502285004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,40,8,128,1,float16,float16,0,0.05610666672388712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,40,8,128,1,float16,float16,0,0.1662986675898234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.036346666514873505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.14823466539382935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,40,128,1,float16,float16,0,0.036720000207424164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,1,128,1,float16,float16,0,0.028624000648657482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,2,128,1,float16,float16,0,0.029487999776999157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,4,128,1,float16,float16,0,0.03146133323510488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.021541332205136616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,40,128,1,float16,float16,0,0.02257599929968516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.022698665658632915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,40,8,128,1,float16,float16,0,0.031632001201311745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.07235200206438701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,1,128,1,float16,float16,0,0.018330667167901993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.03854399919509888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,40,128,1,float16,float16,0,0.015557333827018738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,4,128,1,float16,float16,0,0.019621333728233974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,2,128,1,float16,float16,0,0.018698666244745255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,40,8,128,1,float16,float16,0,0.019776000330845516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.01632533346613248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,1,128,1,float16,float16,0,0.013557333499193192
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,2,128,1,float16,float16,0,0.013424000392357508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.013760000467300415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.013744000345468521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,8,128,1,float16,float16,0,0.013642666240533194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,40,4,128,1,float16,float16,0,0.01368533323208491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.014117332796255747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,40,128,1,float16,float16,0,0.01003200002014637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.025631998976071674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.014058666924635569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,1,128,1,float16,float16,0,0.009141333401203156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,2,128,1,float16,float16,0,0.00921066664159298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,4,128,1,float16,float16,0,0.009290666629870733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,40,8,128,1,float16,float16,0,0.009269333134094873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.019098666807015736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.01349866638580958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.5321493148803711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,1,128,1,float16,float16,0,0.5757493178049723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.5658346811930338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,2,128,1,float16,float16,0,0.61735999584198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.6907733281453451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.851850668589274
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,4,128,1,float16,float16,0,0.7284320195515951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,40,8,128,1,float16,float16,0,0.917242685953776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,1,128,1,float16,float16,0,0.2816320061683655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,40,128,1,float16,float16,0,1.0202613671620686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.1469439963499705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,8,128,1,float16,float16,0,0.4182186524073283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.2258239984512329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,2,128,1,float16,float16,0,0.28918399413426715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,40,128,1,float16,float16,0,0.48499735196431476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,40,4,128,1,float16,float16,0,0.3218239943186442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.2861599922180176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.35225598017374676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,1,128,1,float16,float16,0,0.08764800429344177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.03272533416748047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,4,128,1,float16,float16,0,0.11521599690119426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.038218667109807335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,8,128,1,float16,float16,0,0.15267200271288553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.0643039991458257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.3793333371480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,40,128,1,float16,float16,0,0.16552533706029257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,40,2,128,1,float16,float16,0,0.09288000067075093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,1,128,1,float16,float16,0,0.04731200138727824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.022250667214393616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.130949338277181
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,8,128,1,float16,float16,0,0.05343466500441233
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.9498986403147379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,4,128,1,float16,float16,0,0.052933335304260254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.05418133238951365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,40,128,1,float16,float16,0,0.03416533271471659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.024325333535671234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.018112000077962875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.049471999208132424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,4,128,1,float16,float16,0,0.029445332785447437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,40,2,128,1,float16,float16,0,0.049360002080599465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,8,128,1,float16,float16,0,0.029530666768550873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.03631466627120972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,2,128,1,float16,float16,0,0.027535999814669292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,1,128,1,float16,float16,0,0.01714133347074191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,40,128,1,float16,float16,0,0.021498667697111767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,2,128,1,float16,float16,0,0.017322666943073273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.022687998910744984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,4,128,1,float16,float16,0,0.018453333526849747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,40,8,128,1,float16,float16,0,0.018543999642133713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,1,128,1,float16,float16,0,0.012858666479587555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.016330666840076447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.030784000953038532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,40,128,1,float16,float16,0,0.014826666563749313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.01368533323208491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.019082666685183842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.01368533323208491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,2,128,1,float16,float16,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,4,128,1,float16,float16,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,40,128,1,float16,float16,0,0.00979200005531311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.013898666948080063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.013909333695967993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,40,8,128,1,float16,float16,0,0.012986666212479273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,2,128,1,float16,float16,0,0.008853333070874214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,4,128,1,float16,float16,0,0.00903466654320558
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,40,8,128,1,float16,float16,0,0.00892800030608972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,40,128,1,float16,float16,0,0.009178666397929192
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.012608000387748083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,4,128,1,float16,float16,0,0.008634666601816813
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,2,128,1,float16,float16,0,0.008442666381597519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,40,1,128,1,float16,float16,0,0.026863999664783478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,40,8,128,1,float16,float16,0,0.008656000097592672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.014442666123310724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.19148266315460205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,2,128,1,float16,float16,0,0.3512959877649943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.2640746633211772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,1,128,1,float16,float16,0,0.3466133276621501
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,4,128,1,float16,float16,0,0.3999679883321126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.3352959950764974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,40,8,128,1,float16,float16,0,0.4450613260269165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,40,128,1,float16,float16,0,0.4744693438212077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.4522293408711751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.40032533804575604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.037178667883078255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,1,128,1,float16,float16,0,0.15229866902033487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.04334400097529093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,2,128,1,float16,float16,0,0.1576640009880066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.09872000416119893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,40,128,1,float16,float16,0,0.18821332852045694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,8,128,1,float16,float16,0,0.19986132780710855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.07208000123500824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,40,4,128,1,float16,float16,0,0.1745013395945231
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,1,128,1,float16,float16,0,0.07919999957084656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,2,128,1,float16,float16,0,0.08095466593901317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.02622399975856145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.03640000025431315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.05109333495299021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,4,128,1,float16,float16,0,0.084389328956604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,40,128,1,float16,float16,0,0.050954664746920265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,40,8,128,1,float16,float16,0,0.08501866459846497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.02834133307139079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,1,128,1,float16,float16,0,0.043141335248947144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.020901332298914593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,2,128,1,float16,float16,0,0.044031997521718345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.19924799601236978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,8,128,1,float16,float16,0,0.04584533472855886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.0828959991534551
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,40,4,128,1,float16,float16,0,0.04606399933497111
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.022053333620230358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,40,128,1,float16,float16,0,0.02794133375088374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.02773866554101308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,1,128,1,float16,float16,0,0.024090667565663654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.018565333137909572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,8,128,1,float16,float16,0,0.025263999899228413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,2,128,1,float16,float16,0,0.0242399995525678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.020879998803138733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.020224000016848247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,40,4,128,1,float16,float16,0,0.025370667378107708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.04684799909591675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,40,128,1,float16,float16,0,0.018031999468803406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.019386666516462963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.017952000101407368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,1,128,1,float16,float16,0,0.01613333324591319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,2,128,1,float16,float16,0,0.016261332978804905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.018266666680574417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,8,128,1,float16,float16,0,0.016517333686351776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,40,4,128,1,float16,float16,0,0.016389333953460056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.03419733295838038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,40,128,1,float16,float16,0,0.012831999609867731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,1,128,1,float16,float16,0,0.011952000359694162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,2,128,1,float16,float16,0,0.011866666376590729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,4,128,1,float16,float16,0,0.012080000092585882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,40,8,128,1,float16,float16,0,0.012069333344697952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.017535999417304993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,1,128,1,float16,float16,0,0.008362666393319765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,40,128,1,float16,float16,0,0.008832000195980072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.020047999918460846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,2,128,1,float16,float16,0,0.008309333274761835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,4,128,1,float16,float16,0,0.008314666648705801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,40,8,128,1,float16,float16,0,0.008357333640257517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.01729600007335345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.016586666305859882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,1,128,1,float16,float16,0,0.008000000069538752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,2,128,1,float16,float16,0,0.008026666939258575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,40,128,1,float16,float16,0,0.008687999720374743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.018661333868900936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,4,128,1,float16,float16,0,0.008080000057816505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,40,8,128,1,float16,float16,0,0.008165333420038223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.01803733284274737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.05633600056171417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,4,128,1,float16,float16,0,0.3036373257637024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,1,128,1,float16,float16,0,0.28201599915822345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.03958933303753535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.09325333436330159
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,2,128,1,float16,float16,0,0.2899199922879537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,40,8,128,1,float16,float16,0,0.3343573411305745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.18016533056894937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,40,128,1,float16,float16,0,0.24210667610168457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,1,128,1,float16,float16,0,0.1427786648273468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,2,128,1,float16,float16,0,0.14351999759674072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.28809066613515216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.026906666656335194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.030250666042168934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,4,128,1,float16,float16,0,0.14863999684651694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.04967466493447622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,40,128,1,float16,float16,0,0.08188800017038982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,1,128,1,float16,float16,0,0.07412800192832947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.07464000085989635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,40,8,128,1,float16,float16,0,0.1485973298549652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,2,128,1,float16,float16,0,0.07455466687679291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.020917333662509918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.1295199990272522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,4,128,1,float16,float16,0,0.07663466533025105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,40,128,1,float16,float16,0,0.04496000210444132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.027024000883102417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.04062933226426443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,40,8,128,1,float16,float16,0,0.07725866635640462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,1,128,1,float16,float16,0,0.04085333396991094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,2,128,1,float16,float16,0,0.04073066761096319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,4,128,1,float16,float16,0,0.04171200096607208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.018698666244745255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,40,8,128,1,float16,float16,0,0.04197866717974345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.01854933301607768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.020666666328907013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,40,128,1,float16,float16,0,0.02481599897146225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.0718560020128886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.022554665803909302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,1,128,1,float16,float16,0,0.02306666721900304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,2,128,1,float16,float16,0,0.02288000037272771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,8,128,1,float16,float16,0,0.023370665808518726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,40,128,1,float16,float16,0,0.01632533346613248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.04090133309364319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.01826133330663045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,40,4,128,1,float16,float16,0,0.023130667706330616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,1,128,1,float16,float16,0,0.015471999843915304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.031114667654037476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,8,128,1,float16,float16,0,0.015487999965747198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,4,128,1,float16,float16,0,0.015568000574906668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,40,128,1,float16,float16,0,0.01201066623131434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,40,2,128,1,float16,float16,0,0.01533866673707962
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.017488000293572743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,1,128,1,float16,float16,0,0.011530666301647821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,2,128,1,float16,float16,0,0.011418666690587997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,4,128,1,float16,float16,0,0.011525332927703857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,40,8,128,1,float16,float16,0,0.011493333925803503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.017370666066805523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,40,128,1,float16,float16,0,0.008522666369875273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.016602666427691776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,1,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,2,128,1,float16,float16,0,0.008080000057816505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,4,128,1,float16,float16,0,0.008058666562040647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,1,128,1,float16,float16,0,0.00795199970404307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,40,8,128,1,float16,float16,0,0.008037333066264788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.018309333672126133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,2,128,1,float16,float16,0,0.00790933333337307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.016607999801635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,40,128,1,float16,float16,0,0.00842666688064734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.01661866654952367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.018186666071414948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,8,128,1,float16,float16,0,0.008000000069538752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,40,4,128,1,float16,float16,0,0.008000000069538752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,1,128,1,float16,float16,0,0.0144213338692983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,2,128,1,float16,float16,0,0.028090665737787884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,fp8,0,0.018197332819302876
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,4,128,1,float16,float16,0,0.0421013335386912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,fp8,0,0.024469333390394848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.0359253336985906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,40,8,128,1,float16,float16,0,0.06663999954859416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.05530133346716563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,fp8,0,0.013973332941532135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,1,128,1,float16,float16,0,0.009935999910036722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,2,128,1,float16,float16,0,0.018415999909241993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,40,128,1,float16,float16,0,0.12989333271980286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.1132426659266154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,4,128,1,float16,float16,0,0.025562666356563568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,fp8,0,0.01597333326935768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.02186666677395503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,1,128,1,float16,float16,0,0.00955200009047985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,40,128,1,float16,float16,0,0.06883733471234639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,40,8,128,1,float16,float16,0,0.03932266682386398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,2,128,1,float16,float16,0,0.014069333672523499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.06356266637643178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,4,128,1,float16,float16,0,0.017450666675964992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,40,8,128,1,float16,float16,0,0.02458133300145467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,40,128,1,float16,float16,0,0.03823466598987579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,fp8,0,0.0120319997270902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.02083733429511388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,1,128,1,float16,float16,0,0.009322666873534521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,2,128,1,float16,float16,0,0.013141332815090815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,fp8,0,0.012138667205969492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,40,128,1,float16,float16,0,0.021989333132902782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,4,128,1,float16,float16,0,0.013253333667914072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.012266666938861212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.014266667266686758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.03841600070397059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,40,8,128,1,float16,float16,0,0.01687466725707054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,1,128,1,float16,float16,0,0.009082666908701261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,2,128,1,float16,float16,0,0.012624000509579977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,fp8,0,0.01139733319481214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,fp8,0,0.011685332904259363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.011717333147923151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,8,128,1,float16,float16,0,0.012997332960367203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,40,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.025968000292778015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.013690666606028875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,40,128,1,float16,float16,0,0.014922666052977243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,2,128,1,float16,float16,0,0.012714666624863943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,4,128,1,float16,float16,0,0.012549333274364471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,40,8,128,1,float16,float16,0,0.012655999511480331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,40,128,1,float16,float16,0,0.011365332951148352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,2,128,1,float16,float16,0,0.012335999558369318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,fp8,0,0.011370666325092316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,4,128,1,float16,float16,0,0.01257066677014033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.02049066623051961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,40,8,128,1,float16,float16,0,0.012661332885424295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,40,128,1,float16,float16,0,0.008261333530147871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,2,128,1,float16,float16,0,0.011989332735538483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.016282666474580765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,1,128,1,float16,float16,0,0.008639999975760778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,4,128,1,float16,float16,0,0.012128000458081564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,40,8,128,1,float16,float16,0,0.012047999848922094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,40,128,1,float16,float16,0,0.00814933329820633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,1,128,1,float16,float16,0,0.008757333581646284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,2,128,1,float16,float16,0,0.0086666668454806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,4,128,1,float16,float16,0,0.008736000085870424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,40,8,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.014197333405415217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,3.900266647338867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,5.278767903645833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,9.693045298258463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,31.524373372395832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,1.959269364674886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,32,128,1,float16,float16,0,54.568277994791664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,1,128,1,float16,float16,0,53.150716145833336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,2.7509279251098633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,5.1436160405476885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,2,128,1,float16,float16,0,54.34359232584635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,1,128,1,float16,float16,0,110.84324137369792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,4,128,1,float16,float16,0,111.09836832682292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,2,128,1,float16,float16,0,109.7190450032552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,15.991781870524088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,1.0099626382191975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,32,8,128,1,float16,float16,0,114.52420043945312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,1.4924319585164387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,4,128,1,float16,float16,0,55.070068359375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,2.640597343444824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,32,8,128,1,float16,float16,0,51.98717244466146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,120.44105021158855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,1,128,1,float16,float16,0,25.924026489257812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,32,128,1,float16,float16,0,27.14630889892578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,7.691445032755534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,0.49459731578826904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,2,128,1,float16,float16,0,26.30224100748698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,0.7392053604125977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,32,128,1,float16,float16,0,14.332623799641928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,4,128,1,float16,float16,0,26.775733947753906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.414463996887207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,1,128,1,float16,float16,0,12.829695383707682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,2,128,1,float16,float16,0,14.030736287434896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,32,8,128,1,float16,float16,0,26.607749938964844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,4,128,1,float16,float16,0,13.029925028483072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,3.6882667541503906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,2.8589652379353843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,3.658090591430664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,32,8,128,1,float16,float16,0,13.607621510823568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,28.827311197916668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.398949305216472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,58.838114420572914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,19.904399871826172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,1.5294826825459797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,32,128,1,float16,float16,0,32.540384928385414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,1,128,1,float16,float16,0,31.65216064453125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,1.9441812833150227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.4465812047322593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,1,128,1,float16,float16,0,61.579121907552086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,2,128,1,float16,float16,0,62.47699483235677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,4,128,1,float16,float16,0,63.01616923014323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,2,128,1,float16,float16,0,30.047765096028645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,9.039178848266602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,0.7267733414967855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,32,8,128,1,float16,float16,0,65.56997680664062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,0.9760213692982992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,4,128,1,float16,float16,0,30.29749298095703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,32,128,1,float16,float16,0,16.13639958699544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.7969333330790203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,1,128,1,float16,float16,0,14.867467244466146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,63.85247802734375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,2,128,1,float16,float16,0,14.728069305419922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,4.39195187886556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,32,8,128,1,float16,float16,0,31.863983154296875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.36241598924001056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.5004479885101318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,4,128,1,float16,float16,0,15.471429189046225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,32,128,1,float16,float16,0,7.876752217610677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,1.044816017150879
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,1,128,1,float16,float16,0,7.244234720865886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,2.211008071899414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,32,8,128,1,float16,float16,0,15.363594055175781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,2,128,1,float16,float16,0,7.039205551147461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,4,128,1,float16,float16,0,6.911312103271484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,2.3571786880493164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,32,8,128,1,float16,float16,0,7.229418436686198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,2.960474650065104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,33.22962188720703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,17.203712463378906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.895936012268066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,13.567178090413412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,1.1852533022562664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,32,128,1,float16,float16,0,22.765055338541668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,1,128,1,float16,float16,0,21.708351135253906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,1.5329227447509766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.471951961517334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,1,128,1,float16,float16,0,43.17564900716146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,2,128,1,float16,float16,0,22.95006815592448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,2,128,1,float16,float16,0,43.67698669433594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,4,128,1,float16,float16,0,42.94146728515625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,6.546154657999675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,0.5982400178909302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,32,8,128,1,float16,float16,0,44.15040588378906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,4,128,1,float16,float16,0,21.90430450439453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,0.7646133104960123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.2688319683074951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,1,128,1,float16,float16,0,10.31993039449056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,32,128,1,float16,float16,0,11.24618657430013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,45.124359130859375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,32,8,128,1,float16,float16,0,22.48450215657552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,3.3572746912638345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,2,128,1,float16,float16,0,9.678458531697592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.1955733299255371
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,32,128,1,float16,float16,0,5.304997444152832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.3744746843973796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.675002654393514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,1,128,1,float16,float16,0,4.475152015686035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,4,128,1,float16,float16,0,10.988858540852865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,32,8,128,1,float16,float16,0,10.589717229207357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,2,128,1,float16,float16,0,4.8220265706380205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,2.0603466033935547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,4,128,1,float16,float16,0,4.500255902608235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,32,8,128,1,float16,float16,0,4.190309206644694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,11.001903533935547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,23.79223887125651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,3.698282559712728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,4.715493202209473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,6.964015960693359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,17.58083724975586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,1.861637274424235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,32,128,1,float16,float16,0,29.88018035888672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,1,128,1,float16,float16,0,27.36200459798177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,2.390432039896647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,3.792837460835775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,2,128,1,float16,float16,0,27.190719604492188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,2,128,1,float16,float16,0,57.4567616780599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,1,128,1,float16,float16,0,58.475006103515625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,8,128,1,float16,float16,0,58.11662292480469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,32,4,128,1,float16,float16,0,59.44573974609375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,8.733738581339518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,0.9541920026143392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,4,128,1,float16,float16,0,28.090655008951824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.1489973068237305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,60.757171630859375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,32,128,1,float16,float16,0,15.347386678059896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,32,8,128,1,float16,float16,0,27.907333374023438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,1,128,1,float16,float16,0,13.66034189860026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.912021319071452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,2,128,1,float16,float16,0,14.093748728434244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,4.3283945719401045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.46587733427683514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,4,128,1,float16,float16,0,14.707941691080729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.5894399881362915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,32,128,1,float16,float16,0,7.055274963378906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,1,128,1,float16,float16,0,6.754496256510417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,2,128,1,float16,float16,0,5.892053604125977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.9972480138142904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,32,8,128,1,float16,float16,0,13.81838353474935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,2.2994772593180337
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,30.73217010498047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.08574400345484416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,4,128,1,float16,float16,0,6.097866694132487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.17570134003957114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,32,128,1,float16,float16,0,3.072533289591471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,1,128,1,float16,float16,0,3.0011574427286782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.4566346804300944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,32,8,128,1,float16,float16,0,5.820549647013347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,1.2547252972920735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,2,128,1,float16,float16,0,2.728581428527832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,17.071792602539062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,4,128,1,float16,float16,0,3.0320746103922525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,32,8,128,1,float16,float16,0,3.030789375305176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,6.292261123657227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,2.928389231363932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.4166558583577475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,4.776399930318196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,10.546991984049479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.4546720186869304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,1,128,1,float16,float16,0,15.715632120768229
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,32,128,1,float16,float16,0,17.582117716471355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.627120018005371
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,2.421781380971273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,1,128,1,float16,float16,0,32.24365234375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,2,128,1,float16,float16,0,31.722462972005207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,2,128,1,float16,float16,0,15.43515141805013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,8,128,1,float16,float16,0,34.21867116292318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,32,4,128,1,float16,float16,0,33.91082255045573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,5.488202412923177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.7330880165100098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.8494079907735189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,4,128,1,float16,float16,0,16.67306137084961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,32,128,1,float16,float16,0,8.940736134847006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,1.3213813304901123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,1,128,1,float16,float16,0,7.416869481404622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,35.563402811686196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,32,8,128,1,float16,float16,0,15.712027231852213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,2,128,1,float16,float16,0,7.673840204874675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.3439679940541585
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,2.622053305308024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,4,128,1,float16,float16,0,6.357637405395508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.40669333934783936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,32,128,1,float16,float16,0,4.042501449584961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,1,128,1,float16,float16,0,3.3905067443847656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.6011679967244467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,32,8,128,1,float16,float16,0,7.955322901407878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,2,128,1,float16,float16,0,3.2625013987223306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,1.532591978708903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.07109866539637248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,4,128,1,float16,float16,0,3.485472043355306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,1,128,1,float16,float16,0,1.6997653643290203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,32,128,1,float16,float16,0,1.9932640393575032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,32,8,128,1,float16,float16,0,3.549440066019694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,17.340965270996094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.12947733203570047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.2786506613095601
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,2,128,1,float16,float16,0,1.7301866213480632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,8.040458679199219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.7501066525777181
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,8,128,1,float16,float16,0,1.6494666735331218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,32,4,128,1,float16,float16,0,1.64084259668986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,3.960394541422526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.5346078872680664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,4.239407857259114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,5.669493357340495
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,10.843488057454428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.8018186887105305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,1,128,1,float16,float16,0,15.14077885945638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,32,128,1,float16,float16,0,16.665611267089844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,2.080608050028483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,1,128,1,float16,float16,0,30.430618286132812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,2,128,1,float16,float16,0,30.604293823242188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,2.8564319610595703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,2,128,1,float16,float16,0,14.202096303304037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,4,128,1,float16,float16,0,32.03851318359375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,32,8,128,1,float16,float16,0,32.614234924316406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.9082667032877604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,5.527333577473958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,4,128,1,float16,float16,0,15.017013549804688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,1.0238826274871826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,32,128,1,float16,float16,0,8.031744003295898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,1,128,1,float16,float16,0,6.685125350952148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,1.437162717183431
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,34.3001963297526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,2,128,1,float16,float16,0,6.734021504720052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,2.892101287841797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,32,128,1,float16,float16,0,3.663994789123535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,4,128,1,float16,float16,0,6.880853017171224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,32,8,128,1,float16,float16,0,7.123973210652669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.4378720124562581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,32,8,128,1,float16,float16,0,15.263248443603516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.5352960030237833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,16.165354410807293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,1,128,1,float16,float16,0,2.8436158498128257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.7507627010345459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,4,128,1,float16,float16,0,2.9283361434936523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,2,128,1,float16,float16,0,3.121701240539551
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,1.3867200215657551
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.07277866701285045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,32,128,1,float16,float16,0,1.810431957244873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,32,8,128,1,float16,float16,0,3.0876213709513345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.13079466422398886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,7.9979095458984375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,1,128,1,float16,float16,0,1.470314661661784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.3705386718114217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,2,128,1,float16,float16,0,1.4886986414591472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,4,128,1,float16,float16,0,1.4676480293273926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.7248799800872803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,32,128,1,float16,float16,0,0.9652960300445557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,32,8,128,1,float16,float16,0,1.4957493146260579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,1,128,1,float16,float16,0,0.7679359912872314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.041365332901477814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.062309334675470986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.14275200168291727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,3.521168073018392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,2,128,1,float16,float16,0,0.750117301940918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,4,128,1,float16,float16,0,0.749013344446818
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.3418826659520467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,1.7737119992574055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,32,8,128,1,float16,float16,0,0.7586773236592611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,3.0196053187052407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,3.983978589375814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,2.6528852780659995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,32,128,1,float16,float16,0,10.177274703979492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,1,128,1,float16,float16,0,6.604672114054362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,7.2747039794921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,1,128,1,float16,float16,0,17.17420832316081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,4,128,1,float16,float16,0,17.686453501383465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,2,128,1,float16,float16,0,19.35851796468099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,1.442853291829427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,32,8,128,1,float16,float16,0,18.5853754679362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,2.1674826939900718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,1.5924426714579265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,2,128,1,float16,float16,0,8.046698888142904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,3.7423839569091797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,4,128,1,float16,float16,0,7.859423955281575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,18.842815399169922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,32,128,1,float16,float16,0,4.5394134521484375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.6685012976328532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,1,128,1,float16,float16,0,3.2103732426961265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,32,8,128,1,float16,float16,0,8.629114786783854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.7528746922810873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,1.040117343266805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,2,128,1,float16,float16,0,3.429546674092611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,1.7674719492594402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,8.846202850341797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,4,128,1,float16,float16,0,3.3355468114217124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,32,128,1,float16,float16,0,2.3494505882263184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.35230934619903564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,32,8,128,1,float16,float16,0,3.491210619608561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.37415464719136554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,1,128,1,float16,float16,0,1.6312692960103352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.5037599802017212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.8520906766255697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,32,128,1,float16,float16,0,1.1226773262023926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,4.423301378885905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,2,128,1,float16,float16,0,1.6584107081095378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,8,128,1,float16,float16,0,1.7389012972513835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,32,4,128,1,float16,float16,0,1.8304053942362468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.09486933549245198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.049642667174339294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,1,128,1,float16,float16,0,0.8793546358744303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.17253865798314413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,2,128,1,float16,float16,0,0.8465332984924316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.032933334509531655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.4821973244349162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,8,128,1,float16,float16,0,0.8978133201599121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,32,4,128,1,float16,float16,0,0.9138773282368978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,1,128,1,float16,float16,0,0.4832106828689575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,32,128,1,float16,float16,0,0.5762079954147339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.05684266487757365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,2,128,1,float16,float16,0,0.4373706579208374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,2.092122713724772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,8,128,1,float16,float16,0,0.4546240170796712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.10803733269373576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,32,4,128,1,float16,float16,0,0.4556906620661418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.2209333380063375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,1.1305546760559082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,3.527957280476888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,5.177066802978516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,3.941216150919596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,8.273935953776041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,1,128,1,float16,float16,0,7.053951899210612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,1.893770694732666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,1,128,1,float16,float16,0,16.992965698242188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,4,128,1,float16,float16,0,17.879557291666668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,32,128,1,float16,float16,0,9.245269139607748
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,8,128,1,float16,float16,0,17.988501230875652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,1.99019193649292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,32,2,128,1,float16,float16,0,16.948309580485027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,2.481541315714518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,2,128,1,float16,float16,0,7.704458872477214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,4.00328000386556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,4,128,1,float16,float16,0,7.981157302856445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,32,128,1,float16,float16,0,4.599317232767741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,32,8,128,1,float16,float16,0,8.094298680623373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.8851306438446045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,20.0721918741862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,1,128,1,float16,float16,0,3.3977225621541343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,1.2341492970784504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,1.0344639619191487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,9.292218526204428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,2.0301440556844077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,4,128,1,float16,float16,0,3.5403467814127603
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,2,128,1,float16,float16,0,3.1085332234700522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,32,8,128,1,float16,float16,0,3.4677759806315103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.42836801211039227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,32,128,1,float16,float16,0,2.4323840141296387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.47951467831929523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,1,128,1,float16,float16,0,1.7683946291605632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.5935680071512858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,2,128,1,float16,float16,0,1.971450646718343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,4,128,1,float16,float16,0,1.8154826164245605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.06101333101590475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,32,8,128,1,float16,float16,0,1.7090346018473308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,32,128,1,float16,float16,0,1.1679413318634033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,1,128,1,float16,float16,0,0.7911466757456461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.09734400113423665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,4,128,1,float16,float16,0,0.828986644744873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,1.0329439640045166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,8,128,1,float16,float16,0,0.8454346656799316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,4.401717185974121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.2736106713612874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.4580373366673787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,2.0015412966410318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,32,2,128,1,float16,float16,0,0.8072319825490316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,32,128,1,float16,float16,0,0.5851840178171793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,1,128,1,float16,float16,0,0.3961493174235026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.0452159990866979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,4,128,1,float16,float16,0,0.45260266462961835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,8,128,1,float16,float16,0,0.4267839988072713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.03475199888149897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,32,128,1,float16,float16,0,0.24523733059565225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,32,2,128,1,float16,float16,0,0.40799466768900555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.08839466174443562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.19368533293406168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,1,128,1,float16,float16,0,0.22360533475875854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.025744001070658367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.05355200171470642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,2,128,1,float16,float16,0,0.22442134221394858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,4,128,1,float16,float16,0,0.2345973253250122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.032773333291212715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,32,8,128,1,float16,float16,0,0.2304746707280477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,1.0940907001495361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.5040053526560465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.11269866426785786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,2.6432159741719565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,3.7943251927693686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,5.720426559448242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,3.131749471028646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,4,128,1,float16,float16,0,10.31438954671224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,1,128,1,float16,float16,0,9.631151835123697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,8,128,1,float16,float16,0,10.202735900878906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,32,2,128,1,float16,float16,0,9.500741322835287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,32,128,1,float16,float16,0,6.006117502848308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,1.470479965209961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,1.3358240127563477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,1,128,1,float16,float16,0,3.5074774424235025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,2,128,1,float16,float16,0,3.850613276163737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,4,128,1,float16,float16,0,4.211093266805013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,1.9076587359110515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.6645333369572958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,2.758666674296061
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,10.637434641520182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,32,8,128,1,float16,float16,0,4.27181339263916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,1,128,1,float16,float16,0,1.9677119255065918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.7193600336710612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,32,128,1,float16,float16,0,3.0870132446289062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,2,128,1,float16,float16,0,1.8295307159423828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,4,128,1,float16,float16,0,2.0207573572794595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.9443840185801188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,1.361994743347168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.31645333766937256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,32,128,1,float16,float16,0,1.533631960550944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,32,8,128,1,float16,float16,0,2.0770559310913086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,1,128,1,float16,float16,0,0.9116906325022379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.33642133076985675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.4113653500874837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,4,128,1,float16,float16,0,0.9783093134562174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,2,128,1,float16,float16,0,0.9736373424530029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,32,8,128,1,float16,float16,0,1.0689973036448162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,2.622005303700765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,32,128,1,float16,float16,0,0.752778689066569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.657589316368103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,1,128,1,float16,float16,0,0.4760799805323283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,5.352714538574219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.06816000243028005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.04332800209522247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,4,128,1,float16,float16,0,0.5165493488311768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.11710400382677714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,2,128,1,float16,float16,0,0.5013973315556844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,1.2641759713490803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,1,128,1,float16,float16,0,0.24507200717926025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,32,8,128,1,float16,float16,0,0.5661866664886475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,2,128,1,float16,float16,0,0.24982933203379312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.034634667138258614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,32,128,1,float16,float16,0,0.3930026690165202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,4,128,1,float16,float16,0,0.26658666133880615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.30163733164469403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.6452320019404093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,32,8,128,1,float16,float16,0,0.2547573248545329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.06902933120727539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,32,128,1,float16,float16,0,0.1550933321317037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.12959466377894083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.021802666286627453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,2,128,1,float16,float16,0,0.14468266566594443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.041589332123597465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.027061333258946735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,1,128,1,float16,float16,0,0.144378662109375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,4,128,1,float16,float16,0,0.15634133418401083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,32,8,128,1,float16,float16,0,0.14892799655596414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.0867039958635966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.3256853421529134
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,3.520458539326986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,3.8937387466430664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,1,128,1,float16,float16,0,9.14575449625651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,8,128,1,float16,float16,0,10.154458363850912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,4.71830399831136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,1.8881707191467285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,4,128,1,float16,float16,0,10.185749053955078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,6.458431879679362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,32,2,128,1,float16,float16,0,7.953178405761719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,1,128,1,float16,float16,0,3.548624038696289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,2,128,1,float16,float16,0,3.61680539449056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,32,128,1,float16,float16,0,6.791541417439778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,2.0804479916890464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,2.351909319559733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,4,128,1,float16,float16,0,3.839557329813639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,1,128,1,float16,float16,0,1.7584586143493652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,3.27128537495931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,32,8,128,1,float16,float16,0,4.308143933614095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.8876000245412191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,11.422757466634115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,32,128,1,float16,float16,0,3.3793598810831704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,2,128,1,float16,float16,0,1.7883680661519368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.9603947003682455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,32,128,1,float16,float16,0,1.6883254051208496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,5.4289811452229815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,8,128,1,float16,float16,0,2.105642636617025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,1.7157440185546875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,1,128,1,float16,float16,0,0.9440266291300455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,1.233573357264201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.4283039967219035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,32,4,128,1,float16,float16,0,1.9434879620869954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.458682656288147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,2,128,1,float16,float16,0,0.9299093087514242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.5315466721852621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.7846240202585856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,2.7478132247924805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,4,128,1,float16,float16,0,0.9718560377756754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.0489279975493749
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,1,128,1,float16,float16,0,0.490447998046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,2,128,1,float16,float16,0,0.5036640167236328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.09598933657010396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,1.366655985514323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.2197386622428894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.3385546604792277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,8,128,1,float16,float16,0,0.5682773192723592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,32,128,1,float16,float16,0,0.8300906817118326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,32,4,128,1,float16,float16,0,0.5046613216400146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,32,8,128,1,float16,float16,0,1.0461546579996746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,1,128,1,float16,float16,0,0.22546666860580444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.028981332977612812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,32,128,1,float16,float16,0,0.430730660756429
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,2,128,1,float16,float16,0,0.23788267374038696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.03710933278004328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.12889066338539124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,4,128,1,float16,float16,0,0.25276799996693927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,32,8,128,1,float16,float16,0,0.26136000951131183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.6722773710886637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,32,128,1,float16,float16,0,0.1699946721394857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.06584000090758006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.02063999945918719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.025962665677070618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,1,128,1,float16,float16,0,0.12688000003496805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,4,128,1,float16,float16,0,0.13686399658521017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,2,128,1,float16,float16,0,0.13019200166066489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.03974399964014689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,32,8,128,1,float16,float16,0,0.13271466890970865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.06959466636180878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,32,128,1,float16,float16,0,0.0851200024286906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,1,128,1,float16,float16,0,0.07894933223724365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.29527467489242554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.0746613343556722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,2,128,1,float16,float16,0,0.08014933268229167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,4,128,1,float16,float16,0,0.0823520024617513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.07970133423805237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,32,8,128,1,float16,float16,0,0.08265066643555959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.08986666798591614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.11526933312416077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.23357866207758585
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,3.52620792388916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,1,128,1,float16,float16,0,6.222938537597656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,3.912288029988607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,2,128,1,float16,float16,0,6.367749532063802
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,4.684639930725098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,4,128,1,float16,float16,0,6.904949188232422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,32,128,1,float16,float16,0,6.412618637084961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,1.7584266662597656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,1,128,1,float16,float16,0,2.208896001180013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,2,128,1,float16,float16,0,2.320906639099121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,32,8,128,1,float16,float16,0,7.76966921488444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,1.9449599583943684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,6.292885462443034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,8.103461583455404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,2.3478239377339682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,4,128,1,float16,float16,0,2.7327839533487954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.9546453158060709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,3.1721652348836265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,2,128,1,float16,float16,0,1.1629386742909749
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,4.208671887715657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,4,128,1,float16,float16,0,1.348416010538737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,1.026037295659383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,1,128,1,float16,float16,0,1.0929333368937175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,32,8,128,1,float16,float16,0,3.177311897277832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,1,128,1,float16,float16,0,0.5685333410898844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,1.1413386662801106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,8,128,1,float16,float16,0,1.555173397064209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,32,128,1,float16,float16,0,1.6221173604329426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,2,128,1,float16,float16,0,0.5960586468378702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,1.6557653745015461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.45606398582458496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,2.135690689086914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,32,32,128,1,float16,float16,0,3.2214345932006836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.5639253457387289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,4,128,1,float16,float16,0,0.6571413278579712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,32,8,128,1,float16,float16,0,0.7686453660329183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,32,128,1,float16,float16,0,0.8745866616566976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,1,128,1,float16,float16,0,0.29926933844884235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.4275466601053874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,2,128,1,float16,float16,0,0.3136319915453593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.0551093320051829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.7814826965332031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.07386666536331177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.997920036315918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,4,128,1,float16,float16,0,0.3417280117670695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.22150399287541708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,1,128,1,float16,float16,0,0.14308800299962363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.282149334748586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,32,8,128,1,float16,float16,0,0.3824373483657837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,32,128,1,float16,float16,0,0.39390401045481366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.03435733417669932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,4,128,1,float16,float16,0,0.15662399927775064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,2,128,1,float16,float16,0,0.14459199706713358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.05542933444182078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,32,8,128,1,float16,float16,0,0.17096000909805298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,1,128,1,float16,float16,0,0.07805333534876506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,32,128,1,float16,float16,0,0.11180800199508667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,2,128,1,float16,float16,0,0.07919466495513916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.02109866589307785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.022656001150608063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,4,128,1,float16,float16,0,0.08453866839408875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.09742933511734009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,32,8,128,1,float16,float16,0,0.08319466809431712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.03279466678698858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.18755199511845908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,32,128,1,float16,float16,0,0.05044800043106079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.07488533357779185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,1,128,1,float16,float16,0,0.04275733232498169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.04786133269468943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,2,128,1,float16,float16,0,0.04371733466784159
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.46515198548634845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.16179200013478598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.08283199866612752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.07712000111738841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,4,128,1,float16,float16,0,0.04693866769472758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,32,128,1,float16,float16,0,0.029669334491093952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,1,128,1,float16,float16,0,0.026880001028378803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,32,8,128,1,float16,float16,0,0.04683733483155569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.04474666714668274
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.09968533118565877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,2,128,1,float16,float16,0,0.026965332527955372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.049642667174339294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,8,128,1,float16,float16,0,0.028042666614055634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.04517866671085358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,32,4,128,1,float16,float16,0,0.027850667635599773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.052842666705449425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.0897226631641388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,1,128,1,float16,float16,0,2.02402130762736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,1.8842933972676594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,1.9464693069458008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,2,128,1,float16,float16,0,2.2072854042053223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,4,128,1,float16,float16,0,2.5560693740844727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,1,128,1,float16,float16,0,0.8596959908803304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,2.373407999674479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,32,8,128,1,float16,float16,0,3.155797322591146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,32,128,1,float16,float16,0,3.230928103129069
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,3.3724692662556968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.8860692977905273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,2,128,1,float16,float16,0,0.9484906991322836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,3.5021492640177407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.971951961517334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,1.229413350423177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,1,128,1,float16,float16,0,0.4217066764831543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,4,128,1,float16,float16,0,1.1136586666107178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,32,128,1,float16,float16,0,1.6138399442036946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,32,8,128,1,float16,float16,0,1.4329226811726887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,1.555946667989095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,1.7340000470479329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,2,128,1,float16,float16,0,0.465498685836792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.456218679745992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,4,128,1,float16,float16,0,0.5400480031967163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.4630773464838664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,1,128,1,float16,float16,0,0.21546133359273276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,32,8,128,1,float16,float16,0,0.7109173138936361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.5257173379262289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,32,128,1,float16,float16,0,0.8018399874369303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.7688266436258951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.07043200234572093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,2,128,1,float16,float16,0,0.23043733835220337
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,4,128,1,float16,float16,0,0.26505066951115924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.2153600056966146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,32,128,1,float16,float16,0,0.36444799105326336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.09222400188446045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.7738293011983236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,2,128,1,float16,float16,0,0.10282666484514873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,32,8,128,1,float16,float16,0,0.3646879990895589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.028991999725500744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,1,128,1,float16,float16,0,0.09806399544080098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,4,128,1,float16,float16,0,0.1120853324731191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,32,8,128,1,float16,float16,0,0.1226026713848114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.29846400022506714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,32,128,1,float16,float16,0,0.09515733520189922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.36078933874766034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.05057600140571594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.07344000041484833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.03446399917205175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.02091199904680252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.022895999252796173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,2,128,1,float16,float16,0,0.0551093320051829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.12727999687194824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,4,128,1,float16,float16,0,0.0598826656738917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,1,128,1,float16,float16,0,0.053685332338015236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,32,8,128,1,float16,float16,0,0.06035199761390686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,1,128,1,float16,float16,0,0.0305226668715477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.030261332790056866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.03811199963092804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,32,128,1,float16,float16,0,0.03685333331425985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,2,128,1,float16,float16,0,0.03140799949566523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.0758240024248759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.07515199979146321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,4,128,1,float16,float16,0,0.03339199970165888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,32,8,128,1,float16,float16,0,0.03367999941110611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.08003733555475871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,1,128,1,float16,float16,0,0.019013332823912304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.044549331068992615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.13205333550771078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,32,128,1,float16,float16,0,0.022357332209746044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.0888853371143341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,4,128,1,float16,float16,0,0.02015999952952067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.04571199913819631
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,2,128,1,float16,float16,0,0.019359999646743137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.044453332821528115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.04961066444714864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.07163199782371521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,1,128,1,float16,float16,0,0.012191999703645706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,32,128,1,float16,float16,0,0.013594667116800943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,32,8,128,1,float16,float16,0,0.02038399999340375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.029333333174387615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,2,128,1,float16,float16,0,0.01227733368674914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,4,128,1,float16,float16,0,0.012341332932313284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.029306667546431225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.03278400003910065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,32,8,128,1,float16,float16,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.04025600105524063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,2,128,1,float16,float16,0,1.0164799690246582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.8882719675699869
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,1,128,1,float16,float16,0,0.9191892941792806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.961178700129191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,4,128,1,float16,float16,0,1.1599360307057698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,1.6115199724833171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,1,128,1,float16,float16,0,0.4546773433685303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,32,128,1,float16,float16,0,1.6270666122436523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,1.1694347063700359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,32,8,128,1,float16,float16,0,1.5112214088439941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,1.7159786224365234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.4267839988072713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,4,128,1,float16,float16,0,0.5501493215560913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,2,128,1,float16,float16,0,0.4919999837875366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,1,128,1,float16,float16,0,0.206821342309316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.7540960311889648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,32,8,128,1,float16,float16,0,0.7314720153808594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.4566986560821533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,2,128,1,float16,float16,0,0.22006932894388834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.7549920082092285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.05329599976539612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.5252853234608968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.07456533114115398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.19402666886647543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.28385066986083984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,4,128,1,float16,float16,0,0.2557493249575297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,32,128,1,float16,float16,0,0.3627893527348836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,32,128,1,float16,float16,0,0.8067786693572998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,1,128,1,float16,float16,0,0.07700266440709432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,32,8,128,1,float16,float16,0,0.34374932448069256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,2,128,1,float16,float16,0,0.08108800152937572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.05020800232887268
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.02914133419593175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.30322666962941486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.034117333590984344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,8,128,1,float16,float16,0,0.09520533680915833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,32,128,1,float16,float16,0,0.0710506687561671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.06181866427262624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.02073066681623459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,32,4,128,1,float16,float16,0,0.0895306666692098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.09798933068911235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,2,128,1,float16,float16,0,0.0440533310174942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,4,128,1,float16,float16,0,0.04769066472848257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.029824001093705494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,8,128,1,float16,float16,0,0.04831466575463613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.02277333289384842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,32,128,1,float16,float16,0,0.030645333230495453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.034671999514102936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,1,128,1,float16,float16,0,0.024421334266662598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.016602666427691776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,32,1,128,1,float16,float16,0,0.04242133100827535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,4,128,1,float16,float16,0,0.027482666075229645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.052069331208864846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,8,128,1,float16,float16,0,0.02769600103298823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,32,2,128,1,float16,float16,0,0.025397333006064098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.020527999848127365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,32,128,1,float16,float16,0,0.01743999992807706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,1,128,1,float16,float16,0,0.014762666076421738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,2,128,1,float16,float16,0,0.014752000570297241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,4,128,1,float16,float16,0,0.015552000453074774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.014698666830857595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,32,8,128,1,float16,float16,0,0.01575999955336253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.021935999393463135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,1,128,1,float16,float16,0,0.010010666524370512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,32,128,1,float16,float16,0,0.010890666395425797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.01350933313369751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.016469333320856094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.029125332832336426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.01369599997997284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.013914667069911957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,8,128,1,float16,float16,0,0.009952000031868616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,2,128,1,float16,float16,0,0.009786666681369146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,32,4,128,1,float16,float16,0,0.010117333382368088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,32,128,1,float16,float16,0,0.009530666594703993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.01358933374285698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,32,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.018031999468803406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,1,128,1,float16,float16,0,0.4554026524225871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.42688532670338947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.45423467953999835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.5238186518351237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,4,128,1,float16,float16,0,0.5550933281580607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,2,128,1,float16,float16,0,0.5335679848988851
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,1,128,1,float16,float16,0,0.21119999885559082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,32,128,1,float16,float16,0,0.8110079765319824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,32,8,128,1,float16,float16,0,0.8029066721598307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.049471999208132424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.7001492977142334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,2,128,1,float16,float16,0,0.21901333332061768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.7361599604288737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,4,128,1,float16,float16,0,0.25619200865427655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.0751039981842041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.2729439934094747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.2595040003458659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,32,128,1,float16,float16,0,0.3615093231201172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,32,8,128,1,float16,float16,0,0.3379253149032593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.02889599899450938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,2,128,1,float16,float16,0,0.07543466488520305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.03390933324893316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,32,128,1,float16,float16,0,0.05935466786225637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.22028799851735434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.06258133550484975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,1,128,1,float16,float16,0,0.0721013347307841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,8,128,1,float16,float16,0,0.0959999958674113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,32,4,128,1,float16,float16,0,0.08340266346931458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.05030933519204458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,2,128,1,float16,float16,0,0.041562666495641075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.022490667800108593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.07285866638024648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,4,128,1,float16,float16,0,0.04552533229192098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,1,128,1,float16,float16,0,0.039893334110577904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,32,8,128,1,float16,float16,0,0.04573333263397217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,32,128,1,float16,float16,0,0.028501334289709728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.03453866640726725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.020799999435742695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,1,128,1,float16,float16,0,0.02266666789849599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.030069333811601002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.016373333831628162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,2,128,1,float16,float16,0,0.023445333043734234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.04022400081157684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,32,128,1,float16,float16,0,0.016597333053747814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,4,128,1,float16,float16,0,0.02533866713444392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.02186666677395503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,32,8,128,1,float16,float16,0,0.02555199960867564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.020608000457286835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,1,128,1,float16,float16,0,0.013776000589132309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,2,128,1,float16,float16,0,0.01368533323208491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.014560000350077948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.014490666488806406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,8,128,1,float16,float16,0,0.01505600040157636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,32,4,128,1,float16,float16,0,0.014783999572197596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.01595199977358182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,32,128,1,float16,float16,0,0.01055466632048289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,1,128,1,float16,float16,0,0.009370666618148485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,2,128,1,float16,float16,0,0.009338666374484697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.023018665611743927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,4,128,1,float16,float16,0,0.009535999968647957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.015664000064134598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,32,128,1,float16,float16,0,0.009253333633144697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.013616000612576803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.01370666672786077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,32,8,128,1,float16,float16,0,0.009808000177145004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,2,128,1,float16,float16,0,0.008517333616813024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,4,128,1,float16,float16,0,0.008896000062425932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,32,8,128,1,float16,float16,0,0.008703999842206636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.013946666071812311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,1,128,1,float16,float16,0,0.008447999755541483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.01251199965675672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,2,128,1,float16,float16,0,0.008389333263039589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,32,128,1,float16,float16,0,0.008703999842206636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,4,128,1,float16,float16,0,0.00855466661353906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.013503999759753546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,32,8,128,1,float16,float16,0,0.008549333239595095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.08376000324885051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,2,128,1,float16,float16,0,0.27685866753260296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,1,128,1,float16,float16,0,0.2740959922472636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.23470399777094522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.14773333072662354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,4,128,1,float16,float16,0,0.3213919997215271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,32,8,128,1,float16,float16,0,0.37005865573883057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.03316266586383184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.35445332527160645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,1,128,1,float16,float16,0,0.12290666500727336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.3185173273086548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,2,128,1,float16,float16,0,0.12653332948684692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,32,128,1,float16,float16,0,0.360698660214742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.03965333352486292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.06529066463311513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,4,128,1,float16,float16,0,0.13423466682434082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,32,128,1,float16,float16,0,0.08854933579762776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.0900266667207082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,32,8,128,1,float16,float16,0,0.14442666371663412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.024693332612514496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,2,128,1,float16,float16,0,0.0663679987192154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.1279039978981018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,1,128,1,float16,float16,0,0.06486933430035909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.02666666607062022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,8,128,1,float16,float16,0,0.0707946668068568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,32,128,1,float16,float16,0,0.042090664307276406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.03505066782236099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,1,128,1,float16,float16,0,0.03602666656176249
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,32,4,128,1,float16,float16,0,0.07008533179759979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.04937600096066793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.019925333559513092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,2,128,1,float16,float16,0,0.03673599908749262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,4,128,1,float16,float16,0,0.03876800090074539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.024671999116738636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.026799999177455902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,32,128,1,float16,float16,0,0.02312533309062322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,32,8,128,1,float16,float16,0,0.03879466652870178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.021514666577180225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.06857066849867503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,1,128,1,float16,float16,0,0.020495999604463577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.037178667883078255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,2,128,1,float16,float16,0,0.02041600023706754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.01966933285196622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,4,128,1,float16,float16,0,0.021551998953024547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.018090666582187016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.020421333611011505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,32,8,128,1,float16,float16,0,0.02141333371400833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,32,128,1,float16,float16,0,0.013818666338920593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,1,128,1,float16,float16,0,0.012517333030700684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.017845333864291508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,2,128,1,float16,float16,0,0.012495999534924826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.017893332988023758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.020581333587567013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,4,128,1,float16,float16,0,0.012661332885424295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,32,8,128,1,float16,float16,0,0.012757333616415659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,1,128,1,float16,float16,0,0.00860799973209699
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,32,128,1,float16,float16,0,0.009045333291093508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.01754666616519292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,4,128,1,float16,float16,0,0.008570666735370954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,2,128,1,float16,float16,0,0.008650666723648706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.01736533393462499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,32,8,128,1,float16,float16,0,0.008725333337982496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.018133333573738735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,32,128,1,float16,float16,0,0.00847999999920527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.016666666915019352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,2,128,1,float16,float16,0,0.00816000004609426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,1,128,1,float16,float16,0,0.008176000167926153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,8,128,1,float16,float16,0,0.008330666770537695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,32,4,128,1,float16,float16,0,0.008277333031098047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.017616000026464462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,1,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,32,128,1,float16,float16,0,0.008357333640257517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.016224000602960587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,2,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,4,128,1,float16,float16,0,0.008101333553592363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.016666666915019352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,32,8,128,1,float16,float16,0,0.008192000289758047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.01736533393462499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.034901333351929985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,1,128,1,float16,float16,0,0.22563733657201132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.090938667456309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.05235200126965841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,2,128,1,float16,float16,0,0.2300586700439453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,8,128,1,float16,float16,0,0.2493279973665873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,32,4,128,1,float16,float16,0,0.2380639910697937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,1,128,1,float16,float16,0,0.11529067158699036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,32,128,1,float16,float16,0,0.14492799838383993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.2018453280131022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.02497066557407379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.14307733376820883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,2,128,1,float16,float16,0,0.11707199613253276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.028533334533373516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.047925333182017006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,32,128,1,float16,float16,0,0.06656533479690552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,4,128,1,float16,float16,0,0.12111999591191609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,1,128,1,float16,float16,0,0.060218666990598045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.10549333691596985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,32,8,128,1,float16,float16,0,0.12097066640853882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,2,128,1,float16,float16,0,0.06118933359781901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.02004266654451688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,4,128,1,float16,float16,0,0.0631466656923294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.07206933200359344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.026202666262785595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.02163733293612798
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,32,128,1,float16,float16,0,0.03682666768630346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,1,128,1,float16,float16,0,0.03425599883000056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.01801066721479098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,32,8,128,1,float16,float16,0,0.06312533219655354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.05843733251094818
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.040207999447981514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.018079999834299088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,4,128,1,float16,float16,0,0.03524799893299738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,2,128,1,float16,float16,0,0.03398933261632919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,32,8,128,1,float16,float16,0,0.0349440003434817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.02015999952952067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,32,128,1,float16,float16,0,0.02080533280968666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,1,128,1,float16,float16,0,0.0194560003777345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,4,128,1,float16,float16,0,0.019744000087181728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.02218666672706604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.018005333840847015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,2,128,1,float16,float16,0,0.01937066639463107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,32,8,128,1,float16,float16,0,0.019653332730134327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,1,128,1,float16,float16,0,0.011952000359694162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.017952000101407368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.03236266722281774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,32,128,1,float16,float16,0,0.012437333663304647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,2,128,1,float16,float16,0,0.01192533348997434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,4,128,1,float16,float16,0,0.012074666718641916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.01659199967980385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,32,8,128,1,float16,float16,0,0.012133333832025528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,32,128,1,float16,float16,0,0.008485333373149237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,1,128,1,float16,float16,0,0.008266666904091835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.01773333301146825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,2,128,1,float16,float16,0,0.008240000034372011
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,8,128,1,float16,float16,0,0.008325333396593729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,32,128,1,float16,float16,0,0.008186666915814081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,32,4,128,1,float16,float16,0,0.008352000266313553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.01643199970324834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,2,128,1,float16,float16,0,0.007887999837597212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,1,128,1,float16,float16,0,0.007994666695594788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,32,8,128,1,float16,float16,0,0.007994666695594788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.016517333686351776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,1,128,1,float16,float16,0,0.007946666950980822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,32,128,1,float16,float16,0,0.008383999889095625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,2,128,1,float16,float16,0,0.007871999715765318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,4,128,1,float16,float16,0,0.007893333211541176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,32,8,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,1,128,1,float16,float16,0,0.013914667069911957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,2,128,1,float16,float16,0,0.027104000250498455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,4,128,1,float16,float16,0,0.04111466556787491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.03491200009981791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,32,8,128,1,float16,float16,0,0.06563733518123627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,32,128,1,float16,float16,0,0.1051573355992635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,1,128,1,float16,float16,0,0.009733333562811216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,fp8,0,0.01358933374285698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,2,128,1,float16,float16,0,0.01807466646035512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.05425600210825602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.09196266531944275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,4,128,1,float16,float16,0,0.025034666061401367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.021557333568731945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,1,128,1,float16,float16,0,0.009408000235756239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,32,8,128,1,float16,float16,0,0.03880000114440918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,fp8,0,0.012639999389648438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,32,128,1,float16,float16,0,0.055973331133524575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,2,128,1,float16,float16,0,0.01340266689658165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.05212266743183136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.032986665765444435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,4,128,1,float16,float16,0,0.01714666684468587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,32,128,1,float16,float16,0,0.031850665807724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.014730667074521383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,1,128,1,float16,float16,0,0.00914666677514712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,32,8,128,1,float16,float16,0,0.024154665569464367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,fp8,0,0.011909333368142446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,2,128,1,float16,float16,0,0.012815999488035837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.020693333198626835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.012005332857370377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.032373333970705666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,32,128,1,float16,float16,0,0.01838933303952217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.011994666109482447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,32,8,128,1,float16,float16,0,0.016538667182127636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.014175999909639359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.011621333658695221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,32,8,128,1,float16,float16,0,0.01293333371480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.021013334393501282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.01381333296497663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,2,128,1,float16,float16,0,0.01240533341964086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,32,128,1,float16,float16,0,0.011541333049535751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,4,128,1,float16,float16,0,0.012506666282812754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.011359999577204386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.011530666301647821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,32,8,128,1,float16,float16,0,0.012661332885424295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,32,128,1,float16,float16,0,0.008112000301480293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.013418667018413544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,2,128,1,float16,float16,0,0.012085333466529846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,1,128,1,float16,float16,0,0.00873066671192646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,4,128,1,float16,float16,0,0.012080000092585882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,32,8,128,1,float16,float16,0,0.01219733307758967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,32,128,1,float16,float16,0,0.007946666950980822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,2,128,1,float16,float16,0,0.012096000214417776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,4,128,1,float16,float16,0,0.012069333344697952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,32,8,128,1,float16,float16,0,0.012191999703645706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.014592000593741735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,1,128,1,float16,float16,0,0.00854399986565113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,32,128,1,float16,float16,0,0.007962666451931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,2,128,1,float16,float16,0,0.008629333227872849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,4,128,1,float16,float16,0,0.008618666479984919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,32,8,128,1,float16,float16,0,0.008629333227872849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.011370666325092316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,3.2281386057535806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,5.190671920776367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,11.117844899495443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,40.49765268961588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,1.643839995066325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,24,128,1,float16,float16,0,41.98651631673177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,2.57696533203125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,1,128,1,float16,float16,0,38.71112569173177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,5.750682830810547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,4,128,1,float16,float16,0,80.84109497070312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,1,128,1,float16,float16,0,81.66859436035156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,2,128,1,float16,float16,0,83.03983052571614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,24,8,128,1,float16,float16,0,82.0034688313802
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,2,128,1,float16,float16,0,40.94744618733724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,0.8536053498586019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,4,128,1,float16,float16,0,42.14964803059896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,1.4809759457906086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,89.00349934895833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,24,8,128,1,float16,float16,0,40.95636749267578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,20.845957438151043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,1,128,1,float16,float16,0,20.998485565185547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,24,128,1,float16,float16,0,22.547279357910156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,3.463839848836263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,2,128,1,float16,float16,0,19.663733164469402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,0.44569599628448486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,9.933184305826822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,24,128,1,float16,float16,0,10.053269068400065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,0.7004746596018473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.493183930714925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,1,128,1,float16,float16,0,10.468602498372396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,4,128,1,float16,float16,0,19.98438898722331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,2,128,1,float16,float16,0,9.683205286661783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,24,8,128,1,float16,float16,0,19.654287974039715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,2.3165440559387207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,4.900208155314128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,4,128,1,float16,float16,0,9.815706888834635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,21.3024164835612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,24,8,128,1,float16,float16,0,9.79922103881836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,43.683258056640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,3.2890345255533853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,6.896490732828776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,23.546368916829426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,24,128,1,float16,float16,0,24.33678944905599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,1.189738670984904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,1.6592319806416829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,1,128,1,float16,float16,0,22.4879887898763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,3.5619678497314453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,1,128,1,float16,float16,0,46.945465087890625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,2,128,1,float16,float16,0,45.2789052327474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,4,128,1,float16,float16,0,45.841776529947914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,24,8,128,1,float16,float16,0,47.94762674967448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,2,128,1,float16,float16,0,23.86529541015625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,0.6391253471374512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,12.847503662109375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,49.66404724121094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,4,128,1,float16,float16,0,23.751007080078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,0.9304266770680746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,24,128,1,float16,float16,0,12.133983612060547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.9757013320922852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,1,128,1,float16,float16,0,11.787867228190104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,24,8,128,1,float16,float16,0,23.393206278483074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,24,128,1,float16,float16,0,5.633973439534505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,5.493146896362305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,2,128,1,float16,float16,0,11.910837809244791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.2058453361193339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,4,128,1,float16,float16,0,12.073322296142578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.5363466739654541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,1,128,1,float16,float16,0,4.767104148864746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,24,8,128,1,float16,float16,0,12.094645182291666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,1.0810773372650146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,4,128,1,float16,float16,0,5.014560063680013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,2,128,1,float16,float16,0,4.838298797607422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,1.8896907170613606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,2.7956533432006836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,24,8,128,1,float16,float16,0,5.194224039713542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,26.41527557373047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,11.999871571858725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,2.5989813804626465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,5.126506805419922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,16.97020212809245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,24,128,1,float16,float16,0,16.69163767496745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,0.9725759824117025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.3242080211639404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,1,128,1,float16,float16,0,32.43883768717448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,2,128,1,float16,float16,0,31.84820302327474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,4,128,1,float16,float16,0,32.722869873046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,2.755920092264811
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,24,8,128,1,float16,float16,0,33.03924814860026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,1,128,1,float16,float16,0,15.6004638671875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,2,128,1,float16,float16,0,16.28063456217448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.5159733295440674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,4,128,1,float16,float16,0,16.026111602783203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,24,128,1,float16,float16,0,8.449125289916992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,8.150741577148438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.7218613624572754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,35.75201161702474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,1.4738933245340984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,1,128,1,float16,float16,0,7.302794774373372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,2,128,1,float16,float16,0,7.099376042683919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,24,128,1,float16,float16,0,3.5793066024780273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,24,8,128,1,float16,float16,0,16.153172810872395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.11505599816640218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,3.9862772623697915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,8,128,1,float16,float16,0,7.748154958089192
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.2731093366940816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,24,4,128,1,float16,float16,0,7.730181376139323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,1,128,1,float16,float16,0,3.1577653884887695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.7893813451131185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,2,128,1,float16,float16,0,3.4704373677571616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,17.39684804280599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,4,128,1,float16,float16,0,3.192138671875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,24,8,128,1,float16,float16,0,3.2156480153401694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,2.239898681640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,2.952335993448893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,8.454992294311523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,3.9112106959025064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,7.226064046223958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,21.618497212727863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,24,128,1,float16,float16,0,22.34723154703776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,1.489232063293457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,1,128,1,float16,float16,0,20.912347157796223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,1.9735093116760254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,3.697200139363607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,1,128,1,float16,float16,0,43.344757080078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,2,128,1,float16,float16,0,43.4629872639974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,4,128,1,float16,float16,0,45.00318908691406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,24,8,128,1,float16,float16,0,44.79894510904948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,2,128,1,float16,float16,0,22.728505452473957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,0.753701368967692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,4,128,1,float16,float16,0,21.063621520996094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,46.98156229654948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.0838879744211833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,10.801034291585287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,24,128,1,float16,float16,0,11.424320220947266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.8423573176066081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,24,8,128,1,float16,float16,0,20.96177037556966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,1,128,1,float16,float16,0,10.9378293355306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,2,128,1,float16,float16,0,9.773359934488932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.38334401448567706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,24,128,1,float16,float16,0,4.961738586425781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,5.374207814534505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,1,128,1,float16,float16,0,4.6080319086710615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.55731201171875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,4,128,1,float16,float16,0,11.334309895833334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,1.1159839630126953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,24,8,128,1,float16,float16,0,10.430554707845053
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,2,128,1,float16,float16,0,4.7671254475911455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,4,128,1,float16,float16,0,4.700794537862142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,24,128,1,float16,float16,0,2.360309282938639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,24,8,128,1,float16,float16,0,4.647066752115886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.0974026620388031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,2.7753492991129556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,10.832693735758463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.22562134265899658
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.4821813503901164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,23.612447102864582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,1,128,1,float16,float16,0,2.5308106740315757
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,2,128,1,float16,float16,0,2.082911968231201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,4,128,1,float16,float16,0,2.211967945098877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,1.4800267219543457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,24,8,128,1,float16,float16,0,2.2211039861043296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,4.984127998352051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.1738826433817544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.8960746129353843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,4.756239891052246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,13.042346954345703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,24,128,1,float16,float16,0,13.142064412434896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,1,128,1,float16,float16,0,12.223344167073568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.143999973932902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.411333401997884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,1,128,1,float16,float16,0,25.583697001139324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,2.4034825960795083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,2,128,1,float16,float16,0,25.18341318766276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,4,128,1,float16,float16,0,25.295130411783855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,24,8,128,1,float16,float16,0,25.037081400553387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,2,128,1,float16,float16,0,11.869829813639322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,4,128,1,float16,float16,0,11.72534434000651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,6.262485504150391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,24,128,1,float16,float16,0,5.957541147867839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.5586933294932047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,1,128,1,float16,float16,0,5.600677490234375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,27.145151774088543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.7356800238291422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,1.2985866864522297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,24,8,128,1,float16,float16,0,12.637551625569662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,2,128,1,float16,float16,0,5.544399897257487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,3.0316108067830405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,4,128,1,float16,float16,0,5.021482785542806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,24,128,1,float16,float16,0,2.8213173548380532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.16057599584261575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.3763519922892253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,13.239093780517578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,24,8,128,1,float16,float16,0,5.558624267578125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,1,128,1,float16,float16,0,2.5920373598734536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.6395999987920126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,2,128,1,float16,float16,0,2.526282628377279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,4,128,1,float16,float16,0,2.3616906801859536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,24,128,1,float16,float16,0,1.499776045481364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,1.7035679817199707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,24,8,128,1,float16,float16,0,2.7073920567830405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.06993066767851512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.13173866271972656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,1,128,1,float16,float16,0,1.2874346574147542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,2,128,1,float16,float16,0,1.2818186283111572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.31852267185846966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,4,128,1,float16,float16,0,1.4960427284240723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,6.31216557820638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,2.89572811126709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.7928000291188558
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,24,8,128,1,float16,float16,0,1.2349546750386555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.8278185526529946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,3.4346132278442383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,5.4109757741292315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,24,128,1,float16,float16,0,12.127674102783203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,1,128,1,float16,float16,0,10.87375513712565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,12.421045939127604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.5012906392415364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,1,128,1,float16,float16,0,22.87689717610677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,2,128,1,float16,float16,0,23.884966532389324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,4,128,1,float16,float16,0,23.13091786702474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,24,8,128,1,float16,float16,0,24.03912099202474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.81660795211792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,2.6626453399658203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,2,128,1,float16,float16,0,10.511119842529297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,6.2054398854573565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,25.548128763834637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,1,128,1,float16,float16,0,4.6239573160807295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.7174346446990967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,24,128,1,float16,float16,0,6.183504104614258
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,4,128,1,float16,float16,0,11.109973907470703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.9136319955190023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,1.3471466700236003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,24,8,128,1,float16,float16,0,11.858404795328775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,2,128,1,float16,float16,0,4.761055946350098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.35278932253519696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,24,128,1,float16,float16,0,2.7617759704589844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,4,128,1,float16,float16,0,5.006640116373698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,11.60366948445638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,3.2152748107910156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,1,128,1,float16,float16,0,2.3341493606567383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,24,8,128,1,float16,float16,0,5.440858840942383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.43856533368428546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.6935146649678549
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,2,128,1,float16,float16,0,2.1370132764180503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,4,128,1,float16,float16,0,2.5358559290568032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,24,8,128,1,float16,float16,0,2.3555946350097656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,1.5149013201395671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.06159466505050659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,24,128,1,float16,float16,0,1.514618714650472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,5.575344085693359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.12242133418718974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.2879199981689453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,2,128,1,float16,float16,0,1.2098933060963948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,4,128,1,float16,float16,0,1.1490879853566487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,1,128,1,float16,float16,0,1.1109120051066081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,24,128,1,float16,float16,0,0.6978453000386556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,24,8,128,1,float16,float16,0,1.2202666600545247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.046207999189694725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.8284266789754232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,2.730746587117513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,1,128,1,float16,float16,0,0.6069386800130209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.06870399912198384
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.16332266728083292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,4,128,1,float16,float16,0,0.6099199851353964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,2,128,1,float16,float16,0,0.5744693279266357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,1.367802619934082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,24,8,128,1,float16,float16,0,0.5811466773351034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.4562079906463623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,2.480053265889486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,3.693493207295736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,2.203258673350016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,7.755546569824219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,24,128,1,float16,float16,0,7.232432047526042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,2,128,1,float16,float16,0,13.634080251057943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,1,128,1,float16,float16,0,13.414746602376303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,1.0484693050384521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,1,128,1,float16,float16,0,4.898576100667317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,4,128,1,float16,float16,0,13.508837381998697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,24,8,128,1,float16,float16,0,13.667072296142578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,1.2452853520711262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,2.050362745920817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,2,128,1,float16,float16,0,5.431088129679362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,24,128,1,float16,float16,0,3.3656800587972007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,3.7738987604777017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,4,128,1,float16,float16,0,5.83571179707845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.5066560109456381
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,14.531354268391928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,24,8,128,1,float16,float16,0,6.021039962768555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.6404906511306763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.9241546789805094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,2,128,1,float16,float16,0,2.4443146387736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,1,128,1,float16,float16,0,2.638549327850342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,4,128,1,float16,float16,0,2.5664587020874023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,1.903813362121582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.13748266299565634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,6.837973276774089
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,24,128,1,float16,float16,0,1.7372320493062336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,24,8,128,1,float16,float16,0,2.845168113708496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,1,128,1,float16,float16,0,1.2806986967722576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.25409066677093506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,2,128,1,float16,float16,0,1.2579893271128337
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.44806400934855145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,4,128,1,float16,float16,0,1.4446825981140137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,24,128,1,float16,float16,0,0.8347733020782471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,1,128,1,float16,float16,0,0.6275893449783325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,24,8,128,1,float16,float16,0,1.5787146886189778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.9142399628957113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,3.130666732788086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.08355733752250671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.18675732612609863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,2,128,1,float16,float16,0,0.6311466693878174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.04901866614818573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,1.5377492904663086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,4,128,1,float16,float16,0,0.6843626499176025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,24,128,1,float16,float16,0,0.415887991587321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.43170666694641113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.03702933341264725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,1,128,1,float16,float16,0,0.34989333152770996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,24,8,128,1,float16,float16,0,0.6985653241475424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.0513919989267985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,2,128,1,float16,float16,0,0.36345068613688153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,4,128,1,float16,float16,0,0.3547946612040202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.1373546620210012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.7669866879781088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,24,8,128,1,float16,float16,0,0.357589324315389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.2739199995994568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,2.744746526082357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,4.3899946212768555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,3.174778620402018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,8.032480239868164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,4,128,1,float16,float16,0,13.224207560221354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,1,128,1,float16,float16,0,5.15663464864095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,2,128,1,float16,float16,0,11.63489023844401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,8,128,1,float16,float16,0,13.923333485921225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,1.4648213386535645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,24,128,1,float16,float16,0,7.207674662272136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,24,1,128,1,float16,float16,0,12.4127197265625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,1.607807954152425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,2.3092212677001953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,2,128,1,float16,float16,0,5.810496012369792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,4.274160067240397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,14.105386098225912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,4,128,1,float16,float16,0,4.8760426839192705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,1,128,1,float16,float16,0,2.3007465998331704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,24,8,128,1,float16,float16,0,5.242778778076172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.6842719713846842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.8241120179494222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,24,128,1,float16,float16,0,3.6054293314615884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,1.0981600284576416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,4,128,1,float16,float16,0,2.6040639877319336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,6.269093195597331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,2,128,1,float16,float16,0,2.349562644958496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,2.0615199406941733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,24,8,128,1,float16,float16,0,2.6351359685262046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.3475786844889323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,1,128,1,float16,float16,0,1.1534773508707683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.40079466501871747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,2,128,1,float16,float16,0,1.2411466439565022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,4,128,1,float16,float16,0,1.2484320004781086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.5257546504338583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,24,128,1,float16,float16,0,1.7758666674296062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.051738664507865906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,1,128,1,float16,float16,0,0.6229866743087769
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,24,8,128,1,float16,float16,0,1.3366187413533528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,3.1848106384277344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,24,128,1,float16,float16,0,0.8717599709828695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,1.0036853154500325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.07674133280913036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,1.4870667457580566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,2,128,1,float16,float16,0,0.6525599956512451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,4,128,1,float16,float16,0,0.662992000579834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.17721066872278848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.4996906518936157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,24,128,1,float16,float16,0,0.4617439905802409
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,1,128,1,float16,float16,0,0.3314560055732727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.032474666833877563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.04807466765244802
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,2,128,1,float16,float16,0,0.31414933999379474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,24,8,128,1,float16,float16,0,0.6898187001546224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,24,128,1,float16,float16,0,0.18496533234914145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,4,128,1,float16,float16,0,0.3330399990081787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,24,8,128,1,float16,float16,0,0.32758933305740356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.09783466657002766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.2392639915148417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,1,128,1,float16,float16,0,0.1773759921391805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.02696000039577484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.8163306713104248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.03713600089152654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,2,128,1,float16,float16,0,0.1825760006904602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.40677332878112793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,4,128,1,float16,float16,0,0.18759999672571817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,24,8,128,1,float16,float16,0,0.18372799952824911
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.061205332477887474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.1413386662801107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,3.1150665283203125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,2.219637393951416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,2.3340160051981607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,4,128,1,float16,float16,0,7.3642934163411455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,1,128,1,float16,float16,0,6.438559850056966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,8,128,1,float16,float16,0,7.101600011189778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,24,2,128,1,float16,float16,0,7.118703842163086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,1.0411732991536458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,5.4597117106119795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,24,128,1,float16,float16,0,4.572197278340657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,1,128,1,float16,float16,0,2.9967308044433594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,1.173535982767741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,2,128,1,float16,float16,0,2.897823969523112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,1.559648036956787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,4,128,1,float16,float16,0,2.9529972076416016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.5012640158335367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,24,8,128,1,float16,float16,0,3.2922452290852866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,2.725754737854004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,24,128,1,float16,float16,0,2.386799971262614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,2,128,1,float16,float16,0,1.384021282196045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.5569279988606771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.7501333554585775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,1,128,1,float16,float16,0,1.3873440424601238
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,4,128,1,float16,float16,0,1.7687253952026367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,24,8,128,1,float16,float16,0,1.7580265998840332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,24,128,1,float16,float16,0,1.162384033203125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,1.3515040079752605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,1,128,1,float16,float16,0,0.7071786721547445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.15618133544921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,7.960842768351237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.24083733558654785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,3.872432072957357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,1.968784014383952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,4,128,1,float16,float16,0,0.7563520272572836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.34536532560984295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,2,128,1,float16,float16,0,0.779317299524943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,24,8,128,1,float16,float16,0,0.8222666581471761
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,1,128,1,float16,float16,0,0.3813333511352539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.6600533326466879
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,24,128,1,float16,float16,0,0.566266655921936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.03669333209594091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,2,128,1,float16,float16,0,0.37675734361012775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.12893866499265036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,4,128,1,float16,float16,0,0.39344000816345215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.057999998331069946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.2718613346417745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,24,128,1,float16,float16,0,0.27215466896692914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,24,8,128,1,float16,float16,0,0.4527839819590251
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,1,128,1,float16,float16,0,0.19850132862726846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.026426665484905243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.9211680094401041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.03549333413441976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,2,128,1,float16,float16,0,0.20466667413711548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,4,128,1,float16,float16,0,0.20408533016840616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.07986133297284444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,24,8,128,1,float16,float16,0,0.2055520017941793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.15396266182263693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.42643733819325763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,1,128,1,float16,float16,0,0.1109333336353302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.03011200080315272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,4,128,1,float16,float16,0,0.1153546671072642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.26708799600601196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,24,128,1,float16,float16,0,0.11851732929547627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,2,128,1,float16,float16,0,0.11664000153541565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.020432000358899433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,24,8,128,1,float16,float16,0,0.11571733156840007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.04712533454100291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.11044800281524658
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,2.7450027465820312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,1,128,1,float16,float16,0,5.376122792561849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,6.236602783203125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,8,128,1,float16,float16,0,7.111024220784505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,3.10918394724528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,2,128,1,float16,float16,0,6.57033093770345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,3.9989547729492188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,1.4642826716105144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,1,128,1,float16,float16,0,2.6129706700642905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,24,128,1,float16,float16,0,5.029024124145508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,2,128,1,float16,float16,0,2.7262293497721353
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,1.5690560340881348
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,4,128,1,float16,float16,0,3.052303949991862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,24,8,128,1,float16,float16,0,3.377706527709961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,3.178922653198242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,24,128,1,float16,float16,0,2.545530637105306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,24,4,128,1,float16,float16,0,6.823242823282878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,1.981536070505778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,8.110783894856771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.6749493281046549
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,1,128,1,float16,float16,0,1.3167093594868977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,2,128,1,float16,float16,0,1.3877545992533367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,4,128,1,float16,float16,0,1.5225013097127278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,24,8,128,1,float16,float16,0,1.681338628133138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,1.0158452987670898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,1.595157305399577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,4.295968055725098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,1,128,1,float16,float16,0,0.6785653432210287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.7596426804860433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,2,128,1,float16,float16,0,0.7593920230865479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.37142932415008545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.42905600865681964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,24,128,1,float16,float16,0,1.2971466382344563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,4,128,1,float16,float16,0,0.7820373376210531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.34776532649993896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,24,8,128,1,float16,float16,0,0.8844693501790365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.04506133496761322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,1,128,1,float16,float16,0,0.3708426554997762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.0629013329744339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,2,128,1,float16,float16,0,0.39693331718444824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,24,128,1,float16,float16,0,0.6225599845250448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.7751680215199789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,1.0256746610005696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,4,128,1,float16,float16,0,0.40860267480214435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,2.1762293179829917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.1490239997704824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,24,8,128,1,float16,float16,0,0.44488000869750977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.32764800389607746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,1,128,1,float16,float16,0,0.1763040026028951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.029317334294319153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,24,128,1,float16,float16,0,0.32606399059295654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.4946506818135579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,4,128,1,float16,float16,0,0.19155200322469076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.06972266733646393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.0365226666132609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,8,128,1,float16,float16,0,0.19488000869750977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,24,2,128,1,float16,float16,0,0.1811573306719462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,24,128,1,float16,float16,0,0.10550399621327718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,1,128,1,float16,float16,0,0.09895466764767964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.15001066525777182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.02214933435122172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.027664000789324444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.043925335009892784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,2,128,1,float16,float16,0,0.1020906666914622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,8,128,1,float16,float16,0,0.10567999879519145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.2405760089556376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,24,4,128,1,float16,float16,0,0.10549867153167725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.0904319981733958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,24,128,1,float16,float16,0,0.07110933462778728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,1,128,1,float16,float16,0,0.058490668733914696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,2,128,1,float16,float16,0,0.06200533111890157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.06234133243560791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,4,128,1,float16,float16,0,0.062234664956728615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.166703999042511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.06709866722424825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,24,8,128,1,float16,float16,0,0.06980800131956737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.08163199822107951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.10203733046849568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,2.747077306111654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,1,128,1,float16,float16,0,4.697786649068196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,3.118250528971354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,2,128,1,float16,float16,0,4.876778602600098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,3.9718453089396157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,1,128,1,float16,float16,0,1.6643199920654297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,1.3953919410705566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,4,128,1,float16,float16,0,5.424805323282878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,24,128,1,float16,float16,0,4.8294986089070635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,6.176165262858073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,1.5529227256774902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,5.700218836466472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,24,8,128,1,float16,float16,0,6.238090515136719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,2,128,1,float16,float16,0,1.8150720596313477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,4,128,1,float16,float16,0,2.111728032430013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,2.002506732940674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,24,128,1,float16,float16,0,2.4141066869099936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.6802133719126383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,24,8,128,1,float16,float16,0,2.8065385818481445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,2,128,1,float16,float16,0,0.9002453486124674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.7646240393320719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,3.0210558573404946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,4,128,1,float16,float16,0,1.0637333393096924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.9388746420542399
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,24,128,1,float16,float16,0,1.2139306863149006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.3216426571210225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,1.3974612553914387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,1,128,1,float16,float16,0,0.4355786641438802
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,2,128,1,float16,float16,0,0.4725866715113322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.3386773268381755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,8,128,1,float16,float16,0,1.2947626908620198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.4138880173365275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,4,128,1,float16,float16,0,0.5278506676355997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,24,1,128,1,float16,float16,0,0.8329546451568604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.6145439942677816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.04121066629886627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,24,8,128,1,float16,float16,0,0.645301342010498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.6903253396352133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,1,128,1,float16,float16,0,0.22450133164723715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,24,128,1,float16,float16,0,0.59225066502889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,2,128,1,float16,float16,0,0.2461493412653605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,4,128,1,float16,float16,0,0.27265600363413495
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.10538666447003682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,3.1529385248819985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.05592533449331919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,1.545471986134847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,24,128,1,float16,float16,0,0.2397973338762919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,24,8,128,1,float16,float16,0,0.2996480067571004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.2519306739171346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.025920001169045765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,1,128,1,float16,float16,0,0.11348799864451091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,2,128,1,float16,float16,0,0.11476799845695496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.3384213447570801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,4,128,1,float16,float16,0,0.12059733271598816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.053871999184290566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,24,8,128,1,float16,float16,0,0.12229866782824199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,24,128,1,float16,float16,0,0.07019733389218648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.019333332777023315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,2,128,1,float16,float16,0,0.06527466575304668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,1,128,1,float16,float16,0,0.0642133355140686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,4,128,1,float16,float16,0,0.06855999926726024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.024138666689395905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,24,8,128,1,float16,float16,0,0.07070933282375336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.11105066537857056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.03478399912516276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,24,128,1,float16,float16,0,0.03920533259709676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.05397866666316986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.1518400013446808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,1,128,1,float16,float16,0,0.03442133218050003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.06382399797439575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.12314132849375407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,2,128,1,float16,float16,0,0.035605333745479584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,8,128,1,float16,float16,0,0.03777066618204117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,24,4,128,1,float16,float16,0,0.037765334049860634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,24,128,1,float16,float16,0,0.026170666019121807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.059936001896858215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.08442667126655579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.07136000196139018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,1,128,1,float16,float16,0,0.02349333216746648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,2,128,1,float16,float16,0,0.02382933348417282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.03730666637420654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,4,128,1,float16,float16,0,0.02492800106604894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.04058133314053217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,24,8,128,1,float16,float16,0,0.02495466669400533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.04240000247955322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.04833599925041199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.069733331600825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,1.385482629140218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,1.559450626373291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,1,128,1,float16,float16,0,1.4219679832458496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,2,128,1,float16,float16,0,1.676597277323405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,2.011850674947103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,1,128,1,float16,float16,0,0.6511733531951904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,4,128,1,float16,float16,0,2.046010653177897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,24,128,1,float16,float16,0,2.4164427121480307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.6763466993967692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,2,128,1,float16,float16,0,0.7357920010884603
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,2.55621337890625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.7582026322682699
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,4,128,1,float16,float16,0,0.9063306649525961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,2.873258590698242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.9533920288085938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,24,128,1,float16,float16,0,1.218448003133138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,24,8,128,1,float16,float16,0,2.6422932942708335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,1,128,1,float16,float16,0,0.323472003142039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,24,8,128,1,float16,float16,0,1.3260640303293865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,2,128,1,float16,float16,0,0.3659626642862956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,1.4069652557373047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.37250133355458576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,1.2928906281789143
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.40807998180389404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,1,128,1,float16,float16,0,0.16396266222000122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,8,128,1,float16,float16,0,0.6041066646575928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.041093334555625916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.6169919967651367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.56113068262736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,2,128,1,float16,float16,0,0.18053332964579263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,4,128,1,float16,float16,0,0.21196266015370688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,24,128,1,float16,float16,0,0.5874240001042684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,24,4,128,1,float16,float16,0,0.44047999382019043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.09568533301353455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.05285866558551788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,24,128,1,float16,float16,0,0.22271466255187988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.3181706666946411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.02566933383544286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.2566933234532674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.23704000314076742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,24,8,128,1,float16,float16,0,0.283135990301768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,2,128,1,float16,float16,0,0.08076266447703044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,1,128,1,float16,float16,0,0.07747200131416321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.04953599969546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.03102933367093404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,4,128,1,float16,float16,0,0.08922132849693298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.08169066905975342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,2,128,1,float16,float16,0,0.04380266865094503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,24,8,128,1,float16,float16,0,0.09085866808891296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,1,128,1,float16,float16,0,0.04236799975236257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.021007999777793884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,4,128,1,float16,float16,0,0.047781333327293396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.030741333961486816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,24,128,1,float16,float16,0,0.029306667546431225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.10634133219718933
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,8,128,1,float16,float16,0,0.048165331284205117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,24,24,128,1,float16,float16,0,0.05046933392683665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,1,128,1,float16,float16,0,0.02470933397610982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.04161600023508072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.05972266693909963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,2,128,1,float16,float16,0,0.025631998976071674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.10243200262387593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,4,128,1,float16,float16,0,0.02749866743882497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.06677866478761037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.06060799956321716
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.037077332536379494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,24,8,128,1,float16,float16,0,0.027957332630952198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,1,128,1,float16,float16,0,0.01672533278663953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,2,128,1,float16,float16,0,0.016805333395799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.07375999788443248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,24,128,1,float16,float16,0,0.01926400015751521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.0367999995748202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.05871999760468801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,8,128,1,float16,float16,0,0.017866666118303936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.041946664452552795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,24,4,128,1,float16,float16,0,0.01802666609485944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,24,128,1,float16,float16,0,0.012714666624863943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.042634665966033936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,1,128,1,float16,float16,0,0.011792000383138657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,2,128,1,float16,float16,0,0.011920000116030375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,4,128,1,float16,float16,0,0.012058666596810022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.03626133253177007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.026362667481104534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.02603733291228612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,24,8,128,1,float16,float16,0,0.012213333199421564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.029824001093705494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.6799519856770834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,2,128,1,float16,float16,0,0.7829439640045166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.7533066272735596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,1,128,1,float16,float16,0,0.6882987022399902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.9693866570790609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,4,128,1,float16,float16,0,0.945898691813151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,24,8,128,1,float16,float16,0,1.2691946824391682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,1,128,1,float16,float16,0,0.33315734068552655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,24,128,1,float16,float16,0,1.2257973353068035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,1.2623626391092937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,1.4181866645812988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.40695468584696454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,2,128,1,float16,float16,0,0.37306666374206543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.32330665985743207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.34373335043589276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,4,128,1,float16,float16,0,0.44414401054382324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.04107200105985006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.47141865889231366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,1,128,1,float16,float16,0,0.1450826625029246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,2,128,1,float16,float16,0,0.16059199968973795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,24,128,1,float16,float16,0,0.5983733336130778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.05337599913279215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,24,8,128,1,float16,float16,0,0.6235146522521973
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,4,128,1,float16,float16,0,0.19427732626597086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.6277546485265096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,24,8,128,1,float16,float16,0,0.26478399833043414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.21472533543904623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.025909334421157837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.2187946637471517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.04663466910521189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,24,128,1,float16,float16,0,0.21945599714914957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.030810666580994923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,4,128,1,float16,float16,0,0.07161599894364674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,1,128,1,float16,float16,0,0.06030400097370148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.06776533524195354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,1,128,1,float16,float16,0,0.03418133407831192
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.10525332887967427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,24,128,1,float16,float16,0,0.04251199960708618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,2,128,1,float16,float16,0,0.03586666782697042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,8,128,1,float16,float16,0,0.07387199997901917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.08461333314577739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.02091199904680252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,24,2,128,1,float16,float16,0,0.06435200075308482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,8,128,1,float16,float16,0,0.04010133445262909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.028410665690898895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.037903999288876854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,1,128,1,float16,float16,0,0.020058666666348774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,2,128,1,float16,float16,0,0.02086399992307027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.015717333803574245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.016565332810084026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,24,128,1,float16,float16,0,0.02478933334350586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,4,128,1,float16,float16,0,0.02306666721900304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.01961600035429001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,24,4,128,1,float16,float16,0,0.03968533376852671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,24,8,128,1,float16,float16,0,0.02316266546646754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,24,128,1,float16,float16,0,0.016117333124081295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,1,128,1,float16,float16,0,0.014074667046467463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.014117332796255747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.023760000864664715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.04327466587225596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,4,128,1,float16,float16,0,0.015050667027632395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.014384000251690546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.015386667102575302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,8,128,1,float16,float16,0,0.015098666151364645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,24,2,128,1,float16,float16,0,0.014032000054915747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,1,128,1,float16,float16,0,0.009546666716535887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,2,128,1,float16,float16,0,0.009349333122372627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,4,128,1,float16,float16,0,0.009653333574533463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,8,128,1,float16,float16,0,0.009722666814923286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.018757333358128864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.016778666526079178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,24,128,1,float16,float16,0,0.009093333035707474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,24,24,128,1,float16,float16,0,0.010304000228643417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,2,128,1,float16,float16,0,0.008832000195980072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,8,128,1,float16,float16,0,0.008986666798591614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,24,4,128,1,float16,float16,0,0.008879999940594038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,1,128,1,float16,float16,0,0.3388853470484416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.3271626631418864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,2,128,1,float16,float16,0,0.38632531960805255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.3485706647237142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,8,128,1,float16,float16,0,0.6290560166041056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,24,4,128,1,float16,float16,0,0.44407467047373456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.4102773269017537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,1,128,1,float16,float16,0,0.13209066788355509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,24,128,1,float16,float16,0,0.5914133389790853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.47046931584676105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.04171200096607208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.6315946578979492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.08755733569463094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,4,128,1,float16,float16,0,0.19235734144846597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,2,128,1,float16,float16,0,0.15718400478363037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,24,128,1,float16,float16,0,0.21535466114679971
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.05232533315817515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.21278933684031168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,1,128,1,float16,float16,0,0.05620799958705902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.1827146609624227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,24,8,128,1,float16,float16,0,0.284496009349823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,2,128,1,float16,float16,0,0.06012799839178721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.02601066728432973
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.04693866769472758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,8,128,1,float16,float16,0,0.0681333343187968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,24,4,128,1,float16,float16,0,0.06739733119805653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,24,128,1,float16,float16,0,0.04022400081157684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,1,128,1,float16,float16,0,0.031888000667095184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.019082666685183842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.0310506671667099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,2,128,1,float16,float16,0,0.03364799916744232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.021082667013009388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.028330666323502857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.059808000922203064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,24,128,1,float16,float16,0,0.02295999974012375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,8,128,1,float16,float16,0,0.037765334049860634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,24,4,128,1,float16,float16,0,0.03755199909210205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,1,128,1,float16,float16,0,0.018506667266289394
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.03295466552178065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.05861333509286245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,2,128,1,float16,float16,0,0.01921066641807556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.016528000434239704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.036389333506425224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,24,128,1,float16,float16,0,0.015589332828919092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,4,128,1,float16,float16,0,0.021488000949223835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,24,8,128,1,float16,float16,0,0.021520001192887623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.021082667013009388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.013994666437307993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,2,128,1,float16,float16,0,0.013471999516089758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,4,128,1,float16,float16,0,0.014357333381970724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.01434133326013883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.015482666591803232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,8,128,1,float16,float16,0,0.014533333480358124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,24,1,128,1,float16,float16,0,0.013514666507641474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,24,128,1,float16,float16,0,0.009930666536092758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,8,128,1,float16,float16,0,0.009322666873534521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,24,4,128,1,float16,float16,0,0.009450666606426239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,24,128,1,float16,float16,0,0.008933333059151968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,1,128,1,float16,float16,0,0.008661333471536636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,2,128,1,float16,float16,0,0.008602666358153025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.013514666507641474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,4,128,1,float16,float16,0,0.00873066671192646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.013701333353916803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,24,128,1,float16,float16,0,0.008682666967312494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,24,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,1,128,1,float16,float16,0,0.008325333396593729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.012400000045696894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.012554666648308435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,4,128,1,float16,float16,0,0.008378666515151659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,2,128,1,float16,float16,0,0.008383999889095625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,24,8,128,1,float16,float16,0,0.00854399986565113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,1,128,1,float16,float16,0,0.1990506649017334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.14844266573588052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,4,128,1,float16,float16,0,0.2456159989039103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.07123733560244243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.04613866905371348
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,2,128,1,float16,float16,0,0.21335466702779135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,24,8,128,1,float16,float16,0,0.28668800989786786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,24,128,1,float16,float16,0,0.23330666621526083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,1,128,1,float16,float16,0,0.09445333480834961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.2643306652704875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.029951999584833782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,2,128,1,float16,float16,0,0.0985599954922994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.2755039930343628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,4,128,1,float16,float16,0,0.10589333375295003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,24,128,1,float16,float16,0,0.05819733440876007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.06158933540185293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,24,8,128,1,float16,float16,0,0.10728533069292705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.08872532844543457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.03668266783157984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.0229120006163915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,2,128,1,float16,float16,0,0.051498666405677795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.0249439999461174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,1,128,1,float16,float16,0,0.050016000866889954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,4,128,1,float16,float16,0,0.055215999484062195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,24,128,1,float16,float16,0,0.03385066737731298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.0981226662794749
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,24,8,128,1,float16,float16,0,0.05602133274078369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.047397335370381675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.05356800059477488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.03385599950949351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,1,128,1,float16,float16,0,0.028602667152881622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,2,128,1,float16,float16,0,0.029359998802344005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,4,128,1,float16,float16,0,0.031530665854612984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.0236160010099411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,24,8,128,1,float16,float16,0,0.03136533250411352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.020474666108687718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,2,128,1,float16,float16,0,0.016778666526079178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.017781333376963932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.026101333399613697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,24,128,1,float16,float16,0,0.018965333700180054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,1,128,1,float16,float16,0,0.016607999801635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.018624000251293182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.03526400029659271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,4,128,1,float16,float16,0,0.017840000490347546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,24,8,128,1,float16,float16,0,0.01788266624013583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,1,128,1,float16,float16,0,0.012426666915416718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.020096000283956528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,24,128,1,float16,float16,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,2,128,1,float16,float16,0,0.012223999947309494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,4,128,1,float16,float16,0,0.01258133351802826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.01777600000301997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,24,8,128,1,float16,float16,0,0.012624000509579977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.017637333522240322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,24,128,1,float16,float16,0,0.008645333349704742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.019738666713237762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,1,128,1,float16,float16,0,0.008416000132759413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,2,128,1,float16,float16,0,0.008421333506703377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,4,128,1,float16,float16,0,0.00843733362853527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.01788266624013583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,24,8,128,1,float16,float16,0,0.008496000121037165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,24,128,1,float16,float16,0,0.008394666636983553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,1,128,1,float16,float16,0,0.008058666562040647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,2,128,1,float16,float16,0,0.00814933329820633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,4,128,1,float16,float16,0,0.008165333420038223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,1,128,1,float16,float16,0,0.0081386665503184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,24,8,128,1,float16,float16,0,0.008256000156203905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,2,128,1,float16,float16,0,0.008154666672150293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,24,128,1,float16,float16,0,0.008176000167926153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,4,128,1,float16,float16,0,0.008143999924262365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,24,8,128,1,float16,float16,0,0.008074666683872541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,1,128,1,float16,float16,0,0.17196265856424967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,2,128,1,float16,float16,0,0.17543999354044595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.04900266726811727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,4,128,1,float16,float16,0,0.1834826668103536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.03206400076548258
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.08713066577911377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,24,8,128,1,float16,float16,0,0.18425599733988443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,24,128,1,float16,float16,0,0.0969493289788564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,1,128,1,float16,float16,0,0.08847999572753906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.15358933806419373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.023546665906906128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,2,128,1,float16,float16,0,0.09040533502896626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.12634133299191794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.02734400083621343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.04664533336957296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,4,128,1,float16,float16,0,0.09430399537086487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,24,8,128,1,float16,float16,0,0.09408000111579895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,24,128,1,float16,float16,0,0.05102399984995524
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,1,128,1,float16,float16,0,0.04634666442871094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.019285333653291065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.06961599985758464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.020736000190178554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,2,128,1,float16,float16,0,0.04726399978001913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.08293866614500682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,8,128,1,float16,float16,0,0.04916800061861674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.025573333104451496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,24,128,1,float16,float16,0,0.029098667204380035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,24,4,128,1,float16,float16,0,0.04913066824277242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.03806933263937632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,2,128,1,float16,float16,0,0.027024000883102417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.017727999637524288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,4,128,1,float16,float16,0,0.027999999622503918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.017866666118303936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,1,128,1,float16,float16,0,0.027098665634791057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.01961600035429001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,24,8,128,1,float16,float16,0,0.028101332485675812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.046112000942230225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,24,128,1,float16,float16,0,0.016757333030303318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.021562665700912476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,2,128,1,float16,float16,0,0.01591466615597407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.031210665901501972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.017818666994571686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,1,128,1,float16,float16,0,0.016000000139077503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,8,128,1,float16,float16,0,0.016224000602960587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,24,4,128,1,float16,float16,0,0.016085332880417507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,24,128,1,float16,float16,0,0.012026666353146235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,1,128,1,float16,float16,0,0.011834666132926941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.017290666699409485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,2,128,1,float16,float16,0,0.011893333246310553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,4,128,1,float16,float16,0,0.011989332735538483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.017551999539136887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.01657066618402799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,24,128,1,float16,float16,0,0.008325333396593729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.016688000410795212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,24,8,128,1,float16,float16,0,0.011989332735538483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.018800000349680584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,2,128,1,float16,float16,0,0.008074666683872541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,1,128,1,float16,float16,0,0.008122666428486506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,1,128,1,float16,float16,0,0.008010666817426682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.01637866720557213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.016575999557971954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,24,128,1,float16,float16,0,0.008229333286484083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.016949333250522614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,24,8,128,1,float16,float16,0,0.008256000156203905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,2,128,1,float16,float16,0,0.008000000069538752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,8,128,1,float16,float16,0,0.008021333565314611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,24,4,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,24,128,1,float16,float16,0,0.008042666440208754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.016927999754746754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.016208000481128693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,1,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,4,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,2,128,1,float16,float16,0,0.007893333211541176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,24,8,128,1,float16,float16,0,0.007871999715765318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,fp8,0,0.016303999970356624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,2,128,1,float16,float16,0,0.026127999027570088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,1,128,1,float16,float16,0,0.013647999614477158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.022437334060668945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,4,128,1,float16,float16,0,0.040074666341145836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.0341333324710528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.053642665346463524
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,24,8,128,1,float16,float16,0,0.06488533318042755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,24,128,1,float16,float16,0,0.08108266691366832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,2,128,1,float16,float16,0,0.017621333400408428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,1,128,1,float16,float16,0,0.009829333052039146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.07198399802049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,4,128,1,float16,float16,0,0.025018667181332905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,24,128,1,float16,float16,0,0.04294399917125702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,1,128,1,float16,float16,0,0.009359999870260557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,24,8,128,1,float16,float16,0,0.03871466716130575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.03257599969704946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.04128533353408178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,fp8,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,2,128,1,float16,float16,0,0.01332266628742218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,4,128,1,float16,float16,0,0.016879999389251072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.014394666999578476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,24,8,128,1,float16,float16,0,0.02404266595840454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,24,128,1,float16,float16,0,0.025226667523384094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,1,128,1,float16,float16,0,0.009045333291093508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.020448000480731327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,2,128,1,float16,float16,0,0.012831999609867731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.011621333658695221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.026485333840052288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,4,128,1,float16,float16,0,0.013045333325862885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,24,8,128,1,float16,float16,0,0.016490666816631954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.01179733375708262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,24,128,1,float16,float16,0,0.015018666783968607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.013855999956528345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,fp8,0,0.011525332927703857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,2,128,1,float16,float16,0,0.012682666381200155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.020432000358899433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.011413333316644033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,4,128,1,float16,float16,0,0.012682666381200155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,24,8,128,1,float16,float16,0,0.012789333860079447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.011503999431928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,24,128,1,float16,float16,0,0.011354666203260422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.013621332744757334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,4,128,1,float16,float16,0,0.012448000411192576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,2,128,1,float16,float16,0,0.012522666404644648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,24,8,128,1,float16,float16,0,0.012560000022252401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.013381333400805792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,24,128,1,float16,float16,0,0.007840000092983246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,2,128,1,float16,float16,0,0.012047999848922094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,4,128,1,float16,float16,0,0.01219733307758967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,24,8,128,1,float16,float16,0,0.012128000458081564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,24,128,1,float16,float16,0,0.007936000203092894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,1,128,1,float16,float16,0,0.008709333216150602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,2,128,1,float16,float16,0,0.01198400060335795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.014069333672523499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,24,128,1,float16,float16,0,0.007711999739209811
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,4,128,1,float16,float16,0,0.012047999848922094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,24,8,128,1,float16,float16,0,0.012063999970753988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,1,128,1,float16,float16,0,0.008527999743819237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,4,128,1,float16,float16,0,0.00874133345981439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,2,128,1,float16,float16,0,0.008645333349704742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,24,8,128,1,float16,float16,0,0.010384000216921171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.011600000162919363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,2.701792081197103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.0158131917317705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,16.08008575439453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,16,128,1,float16,float16,0,28.152676900227863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,1.3829654057820637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,1,128,1,float16,float16,0,26.303822835286457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.7680800755818686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,1,128,1,float16,float16,0,52.26265970865885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,2,128,1,float16,float16,0,52.395487467447914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,4,128,1,float16,float16,0,53.83630879720052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,16,8,128,1,float16,float16,0,53.549092610677086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,2,128,1,float16,float16,0,26.647193908691406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,7.700682957967122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,55.53480021158854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,0.7512426376342773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,58.26781209309896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.3674453099568684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,16,128,1,float16,float16,0,13.629375457763672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,1,128,1,float16,float16,0,13.075968424479166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,4,128,1,float16,float16,0,27.255284627278645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,3.981306711832682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,2,128,1,float16,float16,0,13.873194376627604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,16,8,128,1,float16,float16,0,28.219759623209637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,4,128,1,float16,float16,0,13.822762807210287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,16,128,1,float16,float16,0,6.862831751505534
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,31.395301818847656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.2603893280029297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.8154346942901611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,16,8,128,1,float16,float16,0,14.116597493489584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,1,128,1,float16,float16,0,6.049989064534505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,15.743568420410156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,2.3551947275797525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,31.02581787109375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,2,128,1,float16,float16,0,6.773040135701497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,4,128,1,float16,float16,0,6.175818761189778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,1.8489920298258464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,16,8,128,1,float16,float16,0,6.37548828125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.4588851928710938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,15.063850402832031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,7.327738444010417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,9.140250523885092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,16,128,1,float16,float16,0,16.14726384480794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,1.00383464495341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,1,128,1,float16,float16,0,14.93551508585612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.7777600288391113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,2,128,1,float16,float16,0,31.752960205078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,1,128,1,float16,float16,0,30.337488810221355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,4,128,1,float16,float16,0,29.950037638346355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,16,8,128,1,float16,float16,0,31.188613891601562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,4.59665584564209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,33.90234120686849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,2,128,1,float16,float16,0,14.92196782430013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,32.73747762044271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.5300053358078003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,1.0077173709869385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,4,128,1,float16,float16,0,14.970228830973307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,16,128,1,float16,float16,0,7.5240052541097
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,1,128,1,float16,float16,0,7.2310028076171875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,16,8,128,1,float16,float16,0,15.31603749593099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,2,128,1,float16,float16,0,7.249002456665039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,2.4007627169291177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,16.73473612467448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.18237332503000894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,4,128,1,float16,float16,0,7.220933278401692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,1,128,1,float16,float16,0,3.117482821146647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.48414401213328045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,16,128,1,float16,float16,0,3.334735870361328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,16,8,128,1,float16,float16,0,7.970336278279622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,1.2470026810963948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,2,128,1,float16,float16,0,3.1591199239095054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,16.48962656656901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,4,128,1,float16,float16,0,3.3526665369669595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,8.228997548421225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,1.5218507448832195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,16,8,128,1,float16,float16,0,3.2165492375691733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,8.074000040690104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,4.219216028849284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.5540053049723306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,6.367749532063802
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,16,128,1,float16,float16,0,11.698768615722656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,0.8064693609873453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,1,128,1,float16,float16,0,20.79372787475586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,1,128,1,float16,float16,0,10.8994509379069
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,2,128,1,float16,float16,0,22.821604410807293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.4289813041687012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,4,128,1,float16,float16,0,22.594757080078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,16,8,128,1,float16,float16,0,22.97950490315755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,3.485007921854655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,25.426864624023438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,23.829366048177082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,2,128,1,float16,float16,0,10.331365585327148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.401034673055013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,4,128,1,float16,float16,0,10.538453420003256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.6704586346944174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,16,128,1,float16,float16,0,5.508341471354167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,16,8,128,1,float16,float16,0,10.637765248616537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,1.5796267191569011
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,1,128,1,float16,float16,0,4.914112091064453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,2,128,1,float16,float16,0,4.802432060241699
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,4,128,1,float16,float16,0,4.979658762613933
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,16,128,1,float16,float16,0,2.307802677154541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,12.460187276204428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.14382400115331015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,16,8,128,1,float16,float16,0,4.905167897542317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3518879810969035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,1,128,1,float16,float16,0,2.3841919898986816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,12.158271789550781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,2,128,1,float16,float16,0,2.2131519317626953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,5.711706797281901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,4,128,1,float16,float16,0,2.231328010559082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,1.0111040274302165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,5.487520217895508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,16,8,128,1,float16,float16,0,2.2272960344950357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,2.969013214111328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,2.2631093660990396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,3.5374132792154946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,8.893349329630533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,16,128,1,float16,float16,0,14.824197133382162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.1584053039550781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,1,128,1,float16,float16,0,14.120559692382812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,2,128,1,float16,float16,0,28.970113118489582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,1,128,1,float16,float16,0,30.28711446126302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,4,128,1,float16,float16,0,29.515851338704426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,16,8,128,1,float16,float16,0,29.79413350423177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.9092532793680828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,2,128,1,float16,float16,0,13.779188791910807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,32.4913330078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,29.487386067708332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,4.556698799133301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.5859040021896362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,16,128,1,float16,float16,0,6.995365142822266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,1,128,1,float16,float16,0,6.788501103719075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,2,128,1,float16,float16,0,5.833456039428711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,4,128,1,float16,float16,0,13.708234151204428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.9462133248647054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,2.404703934987386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,16,8,128,1,float16,float16,0,14.37829335530599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,16.483408610026043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,4,128,1,float16,float16,0,6.48741340637207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.20918399095535278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,16,128,1,float16,float16,0,3.3205172220865884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,1,128,1,float16,float16,0,3.0018345514933267
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.4688426653544108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,16,8,128,1,float16,float16,0,7.301765441894531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,16.152751922607422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,7.777119954427083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,2,128,1,float16,float16,0,3.0314133961995444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,1.105456034342448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,7.058330535888672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.09097599983215332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,1,128,1,float16,float16,0,1.4832053184509277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,4,128,1,float16,float16,0,2.778005282084147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,16,128,1,float16,float16,0,1.6042613983154297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.2293226718902588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,16,8,128,1,float16,float16,0,2.806410789489746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,2,128,1,float16,float16,0,1.4824105898539226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,3.713850657145182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.6140533288319906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,4,128,1,float16,float16,0,1.4717547098795574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,16,8,128,1,float16,float16,0,1.4604479471842449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,3.510218620300293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,1.8527092933654785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.6412533124287922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,2.42959992090861
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,5.09335994720459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,16,128,1,float16,float16,0,8.128101348876953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.8378667036692301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,1,128,1,float16,float16,0,16.102986653645832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,1,128,1,float16,float16,0,7.787034352620442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,2,128,1,float16,float16,0,17.43297576904297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,4,128,1,float16,float16,0,16.9027837117513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,1.3468799591064453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,16,8,128,1,float16,float16,0,17.465717315673828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,18.31979242960612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,2,128,1,float16,float16,0,7.7013600667317705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,2.8505919774373374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,17.437642415364582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.413541316986084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.6375946601231893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,4,128,1,float16,float16,0,8.004490534464518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,16,128,1,float16,float16,0,4.299157460530599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,1,128,1,float16,float16,0,3.4387893676757812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,16,8,128,1,float16,float16,0,8.06374422709147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,1.4881067276000977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,2,128,1,float16,float16,0,3.498666763305664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,8.815818786621094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,4,128,1,float16,float16,0,3.36463991800944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.12693333625793457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,16,8,128,1,float16,float16,0,3.780090649922689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,16,128,1,float16,float16,0,2.061247984568278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,1,128,1,float16,float16,0,1.6891093254089355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.24945066372553507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,2,128,1,float16,float16,0,1.821232000986735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,8.501536051432291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.7778773307800293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,4.729424158732097
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,4,128,1,float16,float16,0,1.9712640444437664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,3.932037353515625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.08053866525491078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,16,8,128,1,float16,float16,0,1.6584693590799968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,16,128,1,float16,float16,0,0.9599413077036539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,1,128,1,float16,float16,0,0.849829355875651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.17866667111714682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,1.991136074066162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,2,128,1,float16,float16,0,0.8625173568725586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,4,128,1,float16,float16,0,0.9083893299102783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.4158080021540324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,16,8,128,1,float16,float16,0,0.8399679660797119
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,2.1347039540608725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,1.1298027038574219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,2.081098715464274
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,2.869482676188151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,5.374245325724284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,1,128,1,float16,float16,0,5.497066497802734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,16,128,1,float16,float16,0,7.095232009887695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,1,128,1,float16,float16,0,15.436079661051432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,2,128,1,float16,float16,0,16.16478983561198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,4,128,1,float16,float16,0,15.643349965413412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,1.065893332163493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,16,8,128,1,float16,float16,0,16.0308100382487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,1.5146560668945312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,16.952064514160156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,2.8718347549438477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,16.103968302408855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,2,128,1,float16,float16,0,6.612464269002278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,4,128,1,float16,float16,0,7.355136235555013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,16,128,1,float16,float16,0,4.1344960530598955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.5459413528442383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,16,8,128,1,float16,float16,0,7.573392232259114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,1,128,1,float16,float16,0,3.1355358759562173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.7386613686879476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,7.621418635050456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,1.6410986582438152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,2,128,1,float16,float16,0,3.3148320515950522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,4,128,1,float16,float16,0,2.935818672180176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.12517866492271423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,7.942842483520508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,16,8,128,1,float16,float16,0,3.241845448811849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,16,128,1,float16,float16,0,2.0518080393473306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,1,128,1,float16,float16,0,1.749717394510905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,3.762282689412435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.3901653289794922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.7376213073730469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,4,128,1,float16,float16,0,1.6605440775553386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,2,128,1,float16,float16,0,1.4640533129374187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,3.8253440856933594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,16,128,1,float16,float16,0,0.9478879769643148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,16,8,128,1,float16,float16,0,1.6354187329610188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.05958400170008341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,1,128,1,float16,float16,0,0.7823893229166666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,1.9070453643798828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.13425599535306296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.4192853371302287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,2,128,1,float16,float16,0,0.7962666352589926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,8,128,1,float16,float16,0,0.8512586752573649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,1.7424960136413574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,16,4,128,1,float16,float16,0,0.7817493279774984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,1,128,1,float16,float16,0,0.41647998491923016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,16,128,1,float16,float16,0,0.434714674949646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,1.0776906808217366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.049626668294270836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,2,128,1,float16,float16,0,0.43439467748006183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.08869866530100505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,4,128,1,float16,float16,0,0.4118400017420451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.2258239984512329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.9051093260447184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,16,8,128,1,float16,float16,0,0.42579201857248944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.6264106829961141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,2.0314613978068032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,1.5135146776835124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,3.7128852208455405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,2,128,1,float16,float16,0,8.806533177693685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,16,128,1,float16,float16,0,4.493071873982747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,4,128,1,float16,float16,0,8.79917844136556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,1,128,1,float16,float16,0,7.9873708089192705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.7710666656494141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,16,8,128,1,float16,float16,0,8.561567942301432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,1.083290656407674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,1,128,1,float16,float16,0,3.252448081970215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,10.210949579874674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,1.7826933860778809
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,9.030160268147787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,2,128,1,float16,float16,0,3.5385332107543945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.37191998958587646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,1,128,1,float16,float16,0,1.6148212750752766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,4,128,1,float16,float16,0,3.86356258392334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,16,8,128,1,float16,float16,0,4.0602827072143555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.5098133484522501
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,16,128,1,float16,float16,0,2.2325919469197593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,2,128,1,float16,float16,0,1.806063969930013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,4.858442624409993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,4.446890513102214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.9011946519215902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,4,128,1,float16,float16,0,1.8168853123982747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.08945066730181377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,16,128,1,float16,float16,0,1.1365919907887776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,16,8,128,1,float16,float16,0,2.012656052907308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,1,128,1,float16,float16,0,0.9778347015380859
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,2,128,1,float16,float16,0,0.9003199736277262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,2.218506654103597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.1519200007120768
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.44323734442392987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,4,128,1,float16,float16,0,1.0051360130310059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,16,128,1,float16,float16,0,0.5721280177434286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,16,8,128,1,float16,float16,0,1.0821386973063152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,1,128,1,float16,float16,0,0.43797866503397626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.0461706668138504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,1.2159519990285237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,2.072378635406494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,2,128,1,float16,float16,0,0.46299731731414795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.1088213324546814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.22406399250030518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,4,128,1,float16,float16,0,0.49593067169189453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,1.0562187035878499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,16,8,128,1,float16,float16,0,0.47162667910257977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.06332266827424367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.6328213214874268
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.03994133323431015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,16,128,1,float16,float16,0,0.26928534110387164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,2,128,1,float16,float16,0,0.25485867261886597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,1,128,1,float16,float16,0,0.26553066571553546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.5703519980112711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,8,128,1,float16,float16,0,0.26808534065882367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.16453333695729574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,16,4,128,1,float16,float16,0,0.2654239932696025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.4017866849899292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,1.9542773564656575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,2.4951039950052896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,4.221295992533366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,4,128,1,float16,float16,0,8.228650410970053
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,8,128,1,float16,float16,0,8.261882781982422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,1,128,1,float16,float16,0,7.676687876383464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,16,2,128,1,float16,float16,0,8.107957204182943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.9914399782816569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,1.2552586396535237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,1,128,1,float16,float16,0,3.090394655863444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,16,128,1,float16,float16,0,4.907727877298991
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,2,128,1,float16,float16,0,3.520064036051432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,4,128,1,float16,float16,0,3.4130398432413735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,8.95201047261556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,2.0444000562032065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,9.414762496948242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,16,8,128,1,float16,float16,0,3.8698720932006836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,16,128,1,float16,float16,0,2.3158079783121743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.46674664815266925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,4.594938596089681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.6294613281885783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,1,128,1,float16,float16,0,1.5370292663574219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,1.0391093095143635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,2,128,1,float16,float16,0,1.604207992553711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,4,128,1,float16,float16,0,1.8376372655232747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,16,8,128,1,float16,float16,0,2.01254940032959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,1,128,1,float16,float16,0,0.8172372976938883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,2.2703679402669272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,2,128,1,float16,float16,0,0.8931039969126383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.25758399566014606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.11153599619865417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,1.9916693369547527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,16,128,1,float16,float16,0,1.1893386840820312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,4.182570775349935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.4719359874725342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,4,128,1,float16,float16,0,0.9040213425954183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,16,8,128,1,float16,float16,0,0.9553386370340983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,1,128,1,float16,float16,0,0.40699732303619385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,16,128,1,float16,float16,0,0.5953866640726725
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,1.0102986494700115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,2,128,1,float16,float16,0,0.41547731558481854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,1.1158719857533772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,4,128,1,float16,float16,0,0.4468959967295329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.0796800007422765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,16,128,1,float16,float16,0,0.25844266017278034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.047925333182017006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,16,8,128,1,float16,float16,0,0.4872959852218628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.21144533157348633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,1,128,1,float16,float16,0,0.22731733322143555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.03230399886767069
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.581765333811442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.49021867911020917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,2,128,1,float16,float16,0,0.23057067394256592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.05336533486843109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,8,128,1,float16,float16,0,0.24067733685175577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,16,128,1,float16,float16,0,0.1439520021279653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,16,4,128,1,float16,float16,0,0.2437493403752645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.12016000350316365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,1,128,1,float16,float16,0,0.12965333461761475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.3141706585884094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,2,128,1,float16,float16,0,0.13240533073743185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,4,128,1,float16,float16,0,0.1355839967727661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.31435734033584595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.04543999830881754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.028917332490285236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,16,8,128,1,float16,float16,0,0.13640000422795615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.07658133407433827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.18642133474349976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,1.4633760452270508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,2.7852160135904946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,1.7885600725809734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,1,128,1,float16,float16,0,4.115056037902832
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,4,128,1,float16,float16,0,4.277749379475911
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,8,128,1,float16,float16,0,4.832117398579915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,5.7160905202229815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,16,2,128,1,float16,float16,0,4.126351992289226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.7258986632029215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,16,128,1,float16,float16,0,3.103391965230306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,2,128,1,float16,float16,0,1.990869363149007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,1,128,1,float16,float16,0,1.8614826202392578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,4,128,1,float16,float16,0,2.0832640329996743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.9152853488922119
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,5.138293266296387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,1.3724479675292969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,16,8,128,1,float16,float16,0,2.4580373764038086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,1,128,1,float16,float16,0,0.9388853708902994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,16,128,1,float16,float16,0,1.520250638326009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,3.0301974614461265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.41866668065388996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,2,128,1,float16,float16,0,1.0279946327209473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,4,128,1,float16,float16,0,1.1073919932047527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,2.5416852633158364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.6463359991709391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.3376106818517049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,16,128,1,float16,float16,0,0.742965300877889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,1,128,1,float16,float16,0,0.5042346715927124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.05982399980227152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,16,8,128,1,float16,float16,0,1.232261339823405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,1.4399360020955403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,2,128,1,float16,float16,0,0.524618665377299
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,1.2154346307118733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,4,128,1,float16,float16,0,0.5473653475443522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.12423466642697652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,16,128,1,float16,float16,0,0.37700267632802326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.3083466688791911
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,16,8,128,1,float16,float16,0,0.6213653484980265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.683679978052775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,1,128,1,float16,float16,0,0.2476266622543335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,2,128,1,float16,float16,0,0.25679999589920044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.03443733354409536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.6243893305460612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.14815466602643332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.0646666685740153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,8,128,1,float16,float16,0,0.2869759996732076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,16,4,128,1,float16,float16,0,0.2723733385403951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,1,128,1,float16,float16,0,0.1441333293914795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.3264533281326294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,16,128,1,float16,float16,0,0.15664000312487283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,2,128,1,float16,float16,0,0.14884266257286072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.027136000494162243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.03990400085846583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.3201120098431905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,4,128,1,float16,float16,0,0.15467733144760132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.0937546690305074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.2057173252105713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,1,128,1,float16,float16,0,0.08902399738629659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,16,8,128,1,float16,float16,0,0.1572160025437673
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.023152001202106476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,16,128,1,float16,float16,0,0.0927946666876475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,2,128,1,float16,float16,0,0.0897866686185201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,4,128,1,float16,float16,0,0.09220799803733826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.03456533451875051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,16,8,128,1,float16,float16,0,0.09156800309816997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.1980853279431661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.05949866771697998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.15124266346295676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,1.9582667350769043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,1,128,1,float16,float16,0,3.663029352823893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,2.341872056325277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,8,128,1,float16,float16,0,5.118789354960124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,2,128,1,float16,float16,0,4.132383982340495
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,16,4,128,1,float16,float16,0,4.491082509358724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,1,128,1,float16,float16,0,1.7740373611450195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.966650644938151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,16,128,1,float16,float16,0,3.3664426803588867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,3.2978026072184243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,2,128,1,float16,float16,0,1.9167200724283855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,6.342671712239583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,5.548394521077474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,8,128,1,float16,float16,0,2.5891626675923667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,1.6820106506347656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,16,4,128,1,float16,float16,0,2.131610711415609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,16,128,1,float16,float16,0,1.7336212793986003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,1,128,1,float16,float16,0,0.9058453241984049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,1.1606026490529378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,2.747338612874349
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.4941973288853963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.5345973173777262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,3.245487848917643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,16,128,1,float16,float16,0,0.8515946865081787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,1.5858346621195476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.8002986907958984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,1,128,1,float16,float16,0,0.4760799805323283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,4,128,1,float16,float16,0,1.1457066535949707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,2,128,1,float16,float16,0,0.9838079611460367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,16,8,128,1,float16,float16,0,1.297381321589152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.10036266843477885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,2,128,1,float16,float16,0,0.49938666820526123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.2087093393007914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,1.3260587056477864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.35120534896850586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,4,128,1,float16,float16,0,0.5533119837443033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.7342987060546875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,16,128,1,float16,float16,0,0.43297600746154785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,1,128,1,float16,float16,0,0.22961066166559854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,16,8,128,1,float16,float16,0,0.6210560003916422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.647765318552653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.03669333209594091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.060362666845321655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,2,128,1,float16,float16,0,0.2447999914487203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,4,128,1,float16,float16,0,0.2899199922879537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,16,128,1,float16,float16,0,0.16496533155441284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,16,8,128,1,float16,float16,0,0.31360532840092975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.36453866958618164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.14661866426467896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,1,128,1,float16,float16,0,0.12378133336702983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.025834667185942333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.03690666705369949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,4,128,1,float16,float16,0,0.1418186624844869
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.2900320092837016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,16,128,1,float16,float16,0,0.08565333485603333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,2,128,1,float16,float16,0,0.13613333304723105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.07924266656239827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.18597332636515299
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,1,128,1,float16,float16,0,0.07914666831493378
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,16,8,128,1,float16,float16,0,0.14644799629847208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,2,128,1,float16,float16,0,0.08224533498287201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.18370666106541952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.020901332298914593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.029504001140594482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,4,128,1,float16,float16,0,0.0846720039844513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.0495306650797526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,16,8,128,1,float16,float16,0,0.08713600039482117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,16,128,1,float16,float16,0,0.04669866462548574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.11585600177447002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.047322665651639305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,2,128,1,float16,float16,0,0.04461866617202759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,1,128,1,float16,float16,0,0.0432533323764801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,4,128,1,float16,float16,0,0.04613333443800608
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.05560533205668131
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,16,8,128,1,float16,float16,0,0.04646400113900503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.07240533332029979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.12441066900889079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.09131200114885966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,1.9562613169352214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,1,128,1,float16,float16,0,3.2000853220621743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,2,128,1,float16,float16,0,3.3701388041178384
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,2.343616008758545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,4,128,1,float16,float16,0,3.904213269551595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,3.154282569885254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,1,128,1,float16,float16,0,1.142741362253825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,16,128,1,float16,float16,0,3.2048266728719077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,1.003391981124878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,4.142416000366211
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,16,8,128,1,float16,float16,0,4.751562754313151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,4,128,1,float16,float16,0,1.5972213745117188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,1.144485314687093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,1.6736586888631184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,4.868933359781901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,2,128,1,float16,float16,0,1.2850613594055176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,1,128,1,float16,float16,0,0.5766133467356364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.5273439884185791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.4572906494140625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,2.0821760495503745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.7178719838460287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,2.4684693018595376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,16,8,128,1,float16,float16,0,2.1260053316752114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,4,128,1,float16,float16,0,0.8001226584116617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.07436800003051758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,16,128,1,float16,float16,0,1.6095040639241536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,16,128,1,float16,float16,0,0.8075199921925863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,8,128,1,float16,float16,0,1.0231893062591553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,2,128,1,float16,float16,0,0.3307146628697713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,16,2,128,1,float16,float16,0,0.6436266501744589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,4,128,1,float16,float16,0,0.3994133472442627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,1.2753653526306152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.18557333946228027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,1,128,1,float16,float16,0,0.14194132884343466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,16,128,1,float16,float16,0,0.36425598462422687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,8,128,1,float16,float16,0,0.5149173339207967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.5280319849650065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.032986665765444435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.2929600079854329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,16,1,128,1,float16,float16,0,0.3051626682281494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,2,128,1,float16,float16,0,0.15617600083351135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.9694026311238607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,8,128,1,float16,float16,0,0.22489599386850992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.04966400067011515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.11184533437093098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,16,4,128,1,float16,float16,0,0.19449067115783691
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.022645334402720135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.4517546494801839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.19225066900253296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,2,128,1,float16,float16,0,0.08326933284600575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.030378667016824085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,16,128,1,float16,float16,0,0.10918933153152466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.24665600061416626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,4,128,1,float16,float16,0,0.09186666210492452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,1,128,1,float16,float16,0,0.07976000010967255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.05658133327960968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,16,128,1,float16,float16,0,0.05186666548252106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,1,128,1,float16,float16,0,0.04378666480382284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.12396799524625142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,2,128,1,float16,float16,0,0.04614399870236715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,16,8,128,1,float16,float16,0,0.09274666508038838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.11793599526087443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,4,128,1,float16,float16,0,0.050442665815353394
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.022543999056021374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.03631466627120972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,16,8,128,1,float16,float16,0,0.05128000179926554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.04466133316357931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,1,128,1,float16,float16,0,0.02741333345572154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,16,128,1,float16,float16,0,0.030986666679382324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.0639519989490509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.04903466502825419
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,4,128,1,float16,float16,0,0.03012799968322118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.09173867106437683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,2,128,1,float16,float16,0,0.028037334481875103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,16,8,128,1,float16,float16,0,0.030229332546393078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,16,128,1,float16,float16,0,0.018863999595244724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.058917333682378135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.029504001140594482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,1,128,1,float16,float16,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,2,128,1,float16,float16,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.06940799951553345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.05054399867852529
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,4,128,1,float16,float16,0,0.01844800015290578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.032629333436489105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.03454400102297465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,16,8,128,1,float16,float16,0,0.01850133389234543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.04065066576004028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.9769759972890218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,1.144800027211507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,2,128,1,float16,float16,0,1.209882656733195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,4,128,1,float16,float16,0,1.5536905924479167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,1,128,1,float16,float16,0,0.9682559967041016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,1.586282730102539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,16,128,1,float16,float16,0,1.617184003194173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,1.6758507092793782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,16,8,128,1,float16,float16,0,2.1188534100850425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,1,128,1,float16,float16,0,0.43507734934488934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,2,128,1,float16,float16,0,0.5227253437042236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,2.4756372769673667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.4553440014521281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.5311679840087891
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,4,128,1,float16,float16,0,0.7013759613037109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,16,128,1,float16,float16,0,0.8074080149332682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,1,128,1,float16,float16,0,0.22147732973098755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,1.1855680147806804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.7264106273651123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.0734559992949168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,2,128,1,float16,float16,0,0.2518346707026164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,4,128,1,float16,float16,0,0.3290826678276062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,16,8,128,1,float16,float16,0,1.0286346276601155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.21795199314753214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.34994665781656903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,16,128,1,float16,float16,0,0.36686400572458905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,16,8,128,1,float16,float16,0,0.49217065175374347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.4615413347880046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.28244266907374066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.7635626792907715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.03295466552178065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,2,128,1,float16,float16,0,0.11111467083295186
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.04458666841189066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,16,128,1,float16,float16,0,0.07925333579381307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,8,128,1,float16,float16,0,0.1836586594581604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.1312266687552134
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,1,128,1,float16,float16,0,0.05535466472307841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.0909546713034312
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,4,128,1,float16,float16,0,0.14179733395576477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.022778667509555817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,4,128,1,float16,float16,0,0.06739733119805653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,2,128,1,float16,float16,0,0.06002133091290792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.04667733112970988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,16,8,128,1,float16,float16,0,0.06756799916426341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,16,128,1,float16,float16,0,0.03842666745185852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,16,1,128,1,float16,float16,0,0.10231999556223552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,1,128,1,float16,float16,0,0.03161066770553589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.18836800257364908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.08226666847864787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.019413333386182785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.08989866574605306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,4,128,1,float16,float16,0,0.03734933336575826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.029663999875386555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,2,128,1,float16,float16,0,0.033402666449546814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,16,128,1,float16,float16,0,0.02314666658639908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.043375998735427856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,16,8,128,1,float16,float16,0,0.037621334195137024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,2,128,1,float16,float16,0,0.0204373337328434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.07214400172233582
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.04452266792456309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,4,128,1,float16,float16,0,0.02250133454799652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.052111998200416565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,8,128,1,float16,float16,0,0.022629333039124806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.04586666822433472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,16,128,1,float16,float16,0,0.013909333695967993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,1,128,1,float16,float16,0,0.01246400053302447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.05857066810131073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,16,1,128,1,float16,float16,0,0.019509332875410717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,2,128,1,float16,float16,0,0.012432000289360682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,4,128,1,float16,float16,0,0.013376000026861826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.029285334050655365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,16,8,128,1,float16,float16,0,0.013461332768201828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.03429333368937174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.04067199925581614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,1,128,1,float16,float16,0,0.011535999675591787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.022042666872342426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,2,128,1,float16,float16,0,0.011509332805871964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,16,128,1,float16,float16,0,0.01210133358836174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.0220320001244545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,4,128,1,float16,float16,0,0.011813333878914515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.026288000245889027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.03517866631348928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,16,8,128,1,float16,float16,0,0.01180800050497055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.025114665428797405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.025536000728607178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,1,128,1,float16,float16,0,0.4677013158798218
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.4614186684290568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.5281013250350952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,2,128,1,float16,float16,0,0.5675199826558431
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.724554697672526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,4,128,1,float16,float16,0,0.7057440280914307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,16,128,1,float16,float16,0,0.8117333253224691
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,16,8,128,1,float16,float16,0,1.0496479670206706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,1,128,1,float16,float16,0,0.21704000234603882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,1.1950026353200276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.6906879742940267
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.07741333544254303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.2779360016187032
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.20310932397842407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.29554667075475055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,16,128,1,float16,float16,0,0.36300798257191974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,2,128,1,float16,float16,0,0.2416213353474935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.451200008392334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,8,128,1,float16,float16,0,0.5040266513824463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,1,128,1,float16,float16,0,0.0801333338022232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,16,4,128,1,float16,float16,0,0.32836800813674927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.03276800115903219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.04451199869314829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,8,128,1,float16,float16,0,0.17521599928538004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.1701493263244629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.08115733166535695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,16,128,1,float16,float16,0,0.06781333188215892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,2,128,1,float16,float16,0,0.08957333366076152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,1,128,1,float16,float16,0,0.04401599864164988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.022661333282788593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.10258666674296062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,2,128,1,float16,float16,0,0.04744533201058706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.02738133321205775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,16,4,128,1,float16,float16,0,0.11346667011578877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.043663998444875084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,16,128,1,float16,float16,0,0.03235200047492981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,8,128,1,float16,float16,0,0.055770665407180786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,16,4,128,1,float16,float16,0,0.055104002356529236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.0176959993938605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.05402666827042898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.06367466847101848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,2,128,1,float16,float16,0,0.027189334233601887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,4,128,1,float16,float16,0,0.03129599988460541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.026869334280490875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,8,128,1,float16,float16,0,0.0315786674618721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,16,128,1,float16,float16,0,0.01899733394384384
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,16,1,128,1,float16,float16,0,0.02571733295917511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.03612799942493439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,1,128,1,float16,float16,0,0.015322666615247726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.03052799900372823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.014864000181357065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,4,128,1,float16,float16,0,0.018330667167901993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.015824000040690105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,2,128,1,float16,float16,0,0.01618133361140887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.018858666221300762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,16,8,128,1,float16,float16,0,0.018325333793958027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,16,128,1,float16,float16,0,0.011557333171367645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.022730665902296703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,1,128,1,float16,float16,0,0.009946666657924652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.013658666362365087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,2,128,1,float16,float16,0,0.009898666913310686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,4,128,1,float16,float16,0,0.011061333119869232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,16,8,128,1,float16,float16,0,0.011077333241701126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.019850666324297588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.01368533323208491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,16,128,1,float16,float16,0,0.00943999985853831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.01870399961868922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,1,128,1,float16,float16,0,0.009226666763424873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,2,128,1,float16,float16,0,0.009194666519761086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,8,128,1,float16,float16,0,0.00926399976015091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,16,4,128,1,float16,float16,0,0.009194666519761086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,16,128,1,float16,float16,0,0.008799999952316284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,1,128,1,float16,float16,0,0.008570666735370954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.01669866715868314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,2,128,1,float16,float16,0,0.008565333361426989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.012602667013804117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.016421332955360413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,8,128,1,float16,float16,0,0.00878399983048439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,16,4,128,1,float16,float16,0,0.008816000074148178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.016309333344300587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,1,128,1,float16,float16,0,0.2164106567700704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.07895466685295105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,4,128,1,float16,float16,0,0.3283360004425049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.21787200371424356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.2797279953956604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,2,128,1,float16,float16,0,0.25982399781545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,16,8,128,1,float16,float16,0,0.5008746782938639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.4542880058288574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,16,128,1,float16,float16,0,0.36474132537841797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.26052266359329224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,1,128,1,float16,float16,0,0.07481066882610321
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.03271466741959254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.0444213350613912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,2,128,1,float16,float16,0,0.08262933293978374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,8,128,1,float16,float16,0,0.1729546586672465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,16,128,1,float16,float16,0,0.06984533369541168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,16,4,128,1,float16,float16,0,0.10824533303578694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.08130666613578796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.1641493340333303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.02254933367172877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.07713599999745686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.027290667096773785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,4,128,1,float16,float16,0,0.05300800005594889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,8,128,1,float16,float16,0,0.05353599786758423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.04329599936803182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.055455997586250305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,16,128,1,float16,float16,0,0.03049066662788391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,16,2,128,1,float16,float16,0,0.04555733501911163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,1,128,1,float16,float16,0,0.023685333629449207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.04179200033346812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,2,128,1,float16,float16,0,0.025445332129796345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,8,128,1,float16,float16,0,0.029504001140594482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,16,128,1,float16,float16,0,0.017727999637524288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.01754666616519292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.030896000564098358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.026698666314284008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,16,4,128,1,float16,float16,0,0.02916266769170761
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,1,128,1,float16,float16,0,0.014309333016475042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.014826666563749313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.015706667055686314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.02386133372783661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,2,128,1,float16,float16,0,0.01509333277742068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,4,128,1,float16,float16,0,0.017152000218629837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,16,8,128,1,float16,float16,0,0.017221332838137943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.018853332847356796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,16,128,1,float16,float16,0,0.01110400011142095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,1,128,1,float16,float16,0,0.009626666704813639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,2,128,1,float16,float16,0,0.009637333452701569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.013541333377361298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.020202666521072388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.016330666840076447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,4,128,1,float16,float16,0,0.010586666564146677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.013807999591032663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,16,8,128,1,float16,float16,0,0.010741333166758219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.014933332800865173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.015594666202863058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,2,128,1,float16,float16,0,0.008837333569924036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,16,128,1,float16,float16,0,0.009317333499590555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.013461332768201828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,16,8,128,1,float16,float16,0,0.009162666896979014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,1,128,1,float16,float16,0,0.008378666515151659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,16,128,1,float16,float16,0,0.008581333483258883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,2,128,1,float16,float16,0,0.00850133349498113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,4,128,1,float16,float16,0,0.008517333616813024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.012613333761692047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,16,128,1,float16,float16,0,0.008496000121037165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,16,8,128,1,float16,float16,0,0.00855466661353906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,1,128,1,float16,float16,0,0.008282666405042013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,2,128,1,float16,float16,0,0.008421333506703377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.012181332955757776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,4,128,1,float16,float16,0,0.008400000010927519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.012527999778588613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,16,8,128,1,float16,float16,0,0.008522666369875273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,1,128,1,float16,float16,0,0.12531733512878418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.0382080003619194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.06028266747792562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,2,128,1,float16,float16,0,0.13390933473904928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.11317867040634155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,4,128,1,float16,float16,0,0.15954132874806723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,16,128,1,float16,float16,0,0.0981066624323527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,16,8,128,1,float16,float16,0,0.20990933974583945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,1,128,1,float16,float16,0,0.06638399759928386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.026682667434215546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.13315199812253317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,2,128,1,float16,float16,0,0.06980800131956737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,4,128,1,float16,float16,0,0.07750933369000752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.032218667368094124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,16,8,128,1,float16,float16,0,0.07840533554553986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.07041599849859874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.08293333152929942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,16,128,1,float16,float16,0,0.04394666850566864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,1,128,1,float16,float16,0,0.037205333511034645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.21147199471791586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.058549334605534874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,2,128,1,float16,float16,0,0.0390079990029335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.023455999791622162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,4,128,1,float16,float16,0,0.042730664213498436
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,16,8,128,1,float16,float16,0,0.04309333364168803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,1,128,1,float16,float16,0,0.02086399992307027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,16,128,1,float16,float16,0,0.024298667907714844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.03166933357715607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.01828266680240631
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,2,128,1,float16,float16,0,0.02149333308140437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.04589866598447164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.038719999293486275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,4,128,1,float16,float16,0,0.023711999257405598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.01952533299724261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,16,8,128,1,float16,float16,0,0.023647998770078022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.022874665757020313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,1,128,1,float16,float16,0,0.012885333349307379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.02496533344189326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.018191999445358913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,16,128,1,float16,float16,0,0.014501333236694336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,2,128,1,float16,float16,0,0.012858666479587555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,4,128,1,float16,float16,0,0.013909333695967993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,16,8,128,1,float16,float16,0,0.014042666802803675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.01877333347996076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.021712000171343487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,16,128,1,float16,float16,0,0.009237333511312803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,2,128,1,float16,float16,0,0.00884799969693025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.017456000049908955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,4,128,1,float16,float16,0,0.00897066667675972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,16,128,1,float16,float16,0,0.008432000254591307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.017504000415404636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,16,8,128,1,float16,float16,0,0.00902399979531765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.017717332889636356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,1,128,1,float16,float16,0,0.008298666526873907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,2,128,1,float16,float16,0,0.008250666782259941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,4,128,1,float16,float16,0,0.008373333141207695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.01676799977819125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,16,8,128,1,float16,float16,0,0.008367999767263731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,1,128,1,float16,float16,0,0.0080960001796484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.016597333053747814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,2,128,1,float16,float16,0,0.008037333066264788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.016506666938463848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,16,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,4,128,1,float16,float16,0,0.0080960001796484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,16,128,1,float16,float16,0,0.008325333396593729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,1,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.016565332810084026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,16,8,128,1,float16,float16,0,0.008127999802430471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,2,128,1,float16,float16,0,0.008005333443482717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,4,128,1,float16,float16,0,0.008005333443482717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,16,8,128,1,float16,float16,0,0.0080960001796484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,1,128,1,float16,float16,0,0.11760000387827556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.02829866607983907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,2,128,1,float16,float16,0,0.12146666646003723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,4,128,1,float16,float16,0,0.12922666470209757
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.0814879983663559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.04549333453178406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,16,8,128,1,float16,float16,0,0.12994666894276938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,16,128,1,float16,float16,0,0.06897066533565521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,1,128,1,float16,float16,0,0.062218666076660156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.021685334543387096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,2,128,1,float16,float16,0,0.06374399860699971
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.1067733367284139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.12006933490435283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,16,128,1,float16,float16,0,0.03825599948565165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.025146665672461193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,4,128,1,float16,float16,0,0.06781866649786632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.045007998744646706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,16,8,128,1,float16,float16,0,0.06816000243028005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.05952533086140951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.06657066444555919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,1,128,1,float16,float16,0,0.034703999757766724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,2,128,1,float16,float16,0,0.0354666660229365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.019941333681344986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.018458666900793713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,16,128,1,float16,float16,0,0.021359999974568684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,8,128,1,float16,float16,0,0.037685332198937736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.024586667617162068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,16,4,128,1,float16,float16,0,0.037658666570981346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.036506667733192444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,1,128,1,float16,float16,0,0.01985599969824155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,4,128,1,float16,float16,0,0.02093333254257838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,8,128,1,float16,float16,0,0.020986666282018025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.03323200096686681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,16,2,128,1,float16,float16,0,0.019871999820073444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,16,128,1,float16,float16,0,0.012831999609867731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,1,128,1,float16,float16,0,0.012159999459981918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.017610666652520496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.020506666352351505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,4,128,1,float16,float16,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,2,128,1,float16,float16,0,0.012346666306257248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.018810667097568512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,16,8,128,1,float16,float16,0,0.012453333785136541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,1,128,1,float16,float16,0,0.008277333031098047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,2,128,1,float16,float16,0,0.008261333530147871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,16,128,1,float16,float16,0,0.008714666590094566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,8,128,1,float16,float16,0,0.008394666636983553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,16,4,128,1,float16,float16,0,0.00847999999920527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.016549333930015564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.01648533344268799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,16,128,1,float16,float16,0,0.008207999790708223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,1,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.016480000068744022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,2,128,1,float16,float16,0,0.007882666463653246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,4,128,1,float16,float16,0,0.008016000191370646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.016229332735141117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,16,8,128,1,float16,float16,0,0.008047999814152718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,16,128,1,float16,float16,0,0.008080000057816505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,1,128,1,float16,float16,0,0.007903999959429106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,2,128,1,float16,float16,0,0.00784533346692721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,4,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,16,128,1,float16,float16,0,0.008112000301480293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,16,8,128,1,float16,float16,0,0.007914666707317034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,1,128,1,float16,float16,0,0.00784533346692721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.016949333250522614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,2,128,1,float16,float16,0,0.007850666840871176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.01659199967980385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,8,128,1,float16,float16,0,0.007850666840871176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,16,4,128,1,float16,float16,0,0.007882666463653246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,1,128,1,float16,float16,0,0.013264000415802002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,2,128,1,float16,float16,0,0.02534399926662445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.02146133283774058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,4,128,1,float16,float16,0,0.03942399968703588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,16,8,128,1,float16,float16,0,0.0640533318122228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.03324799984693527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,1,128,1,float16,float16,0,0.009541333342591921
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,16,128,1,float16,float16,0,0.05657066901524862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,2,128,1,float16,float16,0,0.017301333447297413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.05163733164469401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.014688000082969666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.05274133384227753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,16,128,1,float16,float16,0,0.03214933226505915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,4,128,1,float16,float16,0,0.024346667031447094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.03169599920511246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.020714666694402695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,16,8,128,1,float16,float16,0,0.038047999143600464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,1,128,1,float16,float16,0,0.009205333267649015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.03219199925661087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.012165332833925882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.0120319997270902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,4,128,1,float16,float16,0,0.016607999801635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,16,128,1,float16,float16,0,0.018464000274737675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,2,128,1,float16,float16,0,0.013210666676362356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,16,8,128,1,float16,float16,0,0.02367466688156128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.014080000420411428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.020213333268960316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,2,128,1,float16,float16,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.011509332805871964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.019893333315849304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,4,128,1,float16,float16,0,0.012906666845083237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.01163200040658315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.011653333902359009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,16,8,128,1,float16,float16,0,0.016613333175579708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,16,128,1,float16,float16,0,0.011594666788975397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.01370666672786077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,2,128,1,float16,float16,0,0.01239466667175293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.014106666048367819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,4,128,1,float16,float16,0,0.012693333129088083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,16,8,128,1,float16,float16,0,0.012800000607967377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,16,128,1,float16,float16,0,0.008074666683872541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.015685333559910457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.013728000223636627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,2,128,1,float16,float16,0,0.012266666938861212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,4,128,1,float16,float16,0,0.012479999413092932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,16,128,1,float16,float16,0,0.007760000104705493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,16,8,128,1,float16,float16,0,0.012304000556468964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,2,128,1,float16,float16,0,0.012069333344697952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,4,128,1,float16,float16,0,0.012074666718641916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,16,8,128,1,float16,float16,0,0.012223999947309494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,16,128,1,float16,float16,0,0.007727999861041705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,2,128,1,float16,float16,0,0.011968000481526056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,1,128,1,float16,float16,0,0.008559999987483025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,4,128,1,float16,float16,0,0.012085333466529846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,16,8,128,1,float16,float16,0,0.012122667084137598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,16,128,1,float16,float16,0,0.007818666597207388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,1,128,1,float16,float16,0,0.0084906667470932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,2,128,1,float16,float16,0,0.008570666735370954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,4,128,1,float16,float16,0,0.008645333349704742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,16,8,128,1,float16,float16,0,0.008613333106040955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.01156266654531161
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,2.578773339589437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,5.472970962524414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,1.330074628194173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,12,128,1,float16,float16,0,20.444075266520183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,20.81727472941081
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.944752057393392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,1,128,1,float16,float16,0,19.75011698404948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,2,128,1,float16,float16,0,20.150836944580078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,10.148431777954102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,12,128,1,float16,float16,0,10.226693471272787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,1,128,1,float16,float16,0,39.37800598144531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,0.706816037495931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,2,128,1,float16,float16,0,40.2256113688151
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,12,4,128,1,float16,float16,0,40.670448303222656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.502778689066569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,12,4,128,1,float16,float16,0,19.85841115315755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,12,128,1,float16,float16,0,4.553663889567058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,4.992368062337239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,1,128,1,float16,float16,0,9.569029490152994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,42.671915690104164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.28437334299087524
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.9821920394897461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,2,128,1,float16,float16,0,9.66482162475586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,12,4,128,1,float16,float16,0,9.789967854817709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,1,128,1,float16,float16,0,4.633845329284668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,4,128,1,float16,float16,0,4.245375951131185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,12,2,128,1,float16,float16,0,4.597296078999837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,2.60205872853597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,1.7560213406880696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,10.92849095662435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,3.625973383585612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,20.79159418741862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,0.9337493578592936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,12.10964838663737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,12,128,1,float16,float16,0,11.695327758789062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.9049919446309407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,1,128,1,float16,float16,0,11.372938791910807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,2,128,1,float16,float16,0,10.621450424194336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,1,128,1,float16,float16,0,22.434969584147137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,5.64361572265625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,2,128,1,float16,float16,0,22.690185546875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,12,128,1,float16,float16,0,6.050234476725261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.5050826470057169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,12,4,128,1,float16,float16,0,22.993919372558594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,1.06440536181132
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,12,4,128,1,float16,float16,0,11.769599914550781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,1,128,1,float16,float16,0,5.6964162190755205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,25.672154744466145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,3.0813280741373696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,12,128,1,float16,float16,0,2.512042681376139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,2,128,1,float16,float16,0,5.730906804402669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,12,4,128,1,float16,float16,0,5.423695882161458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.19631467262903848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.5583360195159912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,1,128,1,float16,float16,0,2.6919520696004233
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,2,128,1,float16,float16,0,2.4781972567240396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.335045337677002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,1.7026185989379883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,12.228922526041666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,12,4,128,1,float16,float16,0,2.7425546646118164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,6.162906646728516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,2.7082398732503257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.6773706277211508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,1,128,1,float16,float16,0,6.960394541422526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,8.225114822387695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,12,128,1,float16,float16,0,8.281274795532227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,1.381050745646159
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,2,128,1,float16,float16,0,7.684101104736328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,1,128,1,float16,float16,0,15.851706186930338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,2,128,1,float16,float16,0,16.772986094156902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,3.9170347849527993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,12,4,128,1,float16,float16,0,16.438719431559246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,12,128,1,float16,float16,0,3.9074665705362954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.26205867528915405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.783338705698649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,17.07693862915039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,12,4,128,1,float16,float16,0,8.582090377807617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,1,128,1,float16,float16,0,3.605221430460612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,2,128,1,float16,float16,0,3.410554567972819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,12,4,128,1,float16,float16,0,3.3754612604777017
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.16772266228993735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,12,128,1,float16,float16,0,2.073861281077067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,1,128,1,float16,float16,0,1.9262827237447102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,2.161146640777588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.41626667976379395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,2,128,1,float16,float16,0,1.796768029530843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,1.149616003036499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,12,4,128,1,float16,float16,0,1.723925272623698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,8.427295684814453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,4.103850682576497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,1.990554650624593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,3.6546827952067056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,0.9993653297424316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,1,128,1,float16,float16,0,10.170405069986979
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,12,128,1,float16,float16,0,11.095530192057291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.97597869237264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,10.71902338663737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,2,128,1,float16,float16,0,9.863546371459961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,1,128,1,float16,float16,0,21.13597361246745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,2,128,1,float16,float16,0,21.22430419921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,5.366250356038411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,12,4,128,1,float16,float16,0,22.007296244303387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.5639946858088175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,12,128,1,float16,float16,0,4.767903963724772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,12,4,128,1,float16,float16,0,10.428821563720703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,1.061616023381551
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,23.001502990722656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,1,128,1,float16,float16,0,4.558053334554036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.20348799228668213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,2,128,1,float16,float16,0,4.851429303487142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,2.680805206298828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,12,128,1,float16,float16,0,2.364250659942627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,12,4,128,1,float16,float16,0,4.489541371663411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.5518346627553304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,1,128,1,float16,float16,0,2.0517120361328125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,2,128,1,float16,float16,0,2.0966720581054688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,11.04257583618164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.12406933307647705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,12,128,1,float16,float16,0,1.385749340057373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,1.4272373517354329
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,12,4,128,1,float16,float16,0,2.114154656728109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.32603200276692706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,1,128,1,float16,float16,0,1.2439253330230713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,2,128,1,float16,float16,0,1.3046399752298992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,5.357802708943685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,12,4,128,1,float16,float16,0,1.1410773595174153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.7474613189697266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,2.6672585805257163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.3992907206217449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,2.3717652956644693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.6885546843210856
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,12,128,1,float16,float16,0,6.034224192301433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,1,128,1,float16,float16,0,5.111845334370931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,5.935152053833008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,1.2906826337178547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,2,128,1,float16,float16,0,4.827973365783691
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,1,128,1,float16,float16,0,11.968922932942709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,3.134496053059896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,2,128,1,float16,float16,0,12.215162913004557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,12,4,128,1,float16,float16,0,12.113855997721354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.329802672068278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,12,128,1,float16,float16,0,2.75056521097819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.7177120049794515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,12,4,128,1,float16,float16,0,5.770240147908528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,1,128,1,float16,float16,0,2.5600852966308594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,13.212165832519531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.14381866653760275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,12,128,1,float16,float16,0,1.4276746114095051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,4,128,1,float16,float16,0,2.445221265157064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,12,2,128,1,float16,float16,0,2.7375094095865884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,1,128,1,float16,float16,0,1.2243999640146892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,1.5517813364664714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.3011893431345622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,2,128,1,float16,float16,0,1.273802677790324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,6.336442947387695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,12,128,1,float16,float16,0,0.7336053053538004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,12,4,128,1,float16,float16,0,1.5211307207743328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.9625493685404459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,1,128,1,float16,float16,0,0.6411306858062744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.08654399712880452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,3.109413464864095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.19986667235692343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,2,128,1,float16,float16,0,0.7715946833292643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,1.4879627227783203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,12,4,128,1,float16,float16,0,0.7198346455891927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.5459733406702677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.736512025197347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,2.6857172648111978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,12,128,1,float16,float16,0,5.845663706461589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,6.093637466430664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,1,128,1,float16,float16,0,4.1183732350667315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.9090027014414469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,1,128,1,float16,float16,0,11.22222900390625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,2,128,1,float16,float16,0,11.824031829833984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,1.360703945159912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,12,4,128,1,float16,float16,0,11.995770772298178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,2,128,1,float16,float16,0,4.969104131062825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.4381226698557536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,12,128,1,float16,float16,0,2.952906608581543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,1,128,1,float16,float16,0,2.308314641316732
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,2.998101234436035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,11.738255818684896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.6924373308817545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,12,4,128,1,float16,float16,0,4.813653310139974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,2,128,1,float16,float16,0,2.1791359583536782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,5.537893295288086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,1.5188266436258953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,12,4,128,1,float16,float16,0,2.359978675842285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.13593066732088724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,1,128,1,float16,float16,0,1.0926720301310222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.3278613289197286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,2,128,1,float16,float16,0,1.2049226760864258
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,2.7874558766682944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,12,128,1,float16,float16,0,1.433791955312093
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,12,4,128,1,float16,float16,0,1.1857013702392578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.7937013308207194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.07084799806276958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,1,128,1,float16,float16,0,0.6141653458277384
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,12,128,1,float16,float16,0,0.7272533575693766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.44369598229726154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,4,128,1,float16,float16,0,0.60426131884257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,12,2,128,1,float16,float16,0,0.6817866961161295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.15436800320943198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,12,128,1,float16,float16,0,0.3452586730321248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.05666666726271311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,1,128,1,float16,float16,0,0.3239893317222595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,2,128,1,float16,float16,0,0.3349386850992839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.11038399736086528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,12,4,128,1,float16,float16,0,0.326693336168925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.2759360074996948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,1.3677813212076824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.7463839848836263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,1.249509334564209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,1.9400746027628581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,12,128,1,float16,float16,0,3.5765441258748374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,3.943530718485514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.6006240049997965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,2,128,1,float16,float16,0,5.963696161905925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,1,128,1,float16,float16,0,2.48854398727417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,1,128,1,float16,float16,0,5.736176172892253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,12,4,128,1,float16,float16,0,5.952554702758789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.9159146944681803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.25017066796620685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,6.972474416097005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,2,128,1,float16,float16,0,2.530080000559489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,1,128,1,float16,float16,0,1.4432746569315593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,12,128,1,float16,float16,0,1.6726667086283367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,1.9356427192687988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.4931679964065552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,12,4,128,1,float16,float16,0,2.916442553202311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,2,128,1,float16,float16,0,1.3593866030375164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,12,128,1,float16,float16,0,0.8598453203837076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,3.1816161473592124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.08914666374524434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,1.0117440223693848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,12,4,128,1,float16,float16,0,1.4411786397298176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,1,128,1,float16,float16,0,0.6820639769236246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,2,128,1,float16,float16,0,0.681984027226766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.16994667053222656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,1.5375733375549316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,12,128,1,float16,float16,0,0.4278293450673421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,12,4,128,1,float16,float16,0,0.7466453711191813
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.4822400013605754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,1,128,1,float16,float16,0,0.3703840176264445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.1251306633154551
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,2,128,1,float16,float16,0,0.3679200013478597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,12,128,1,float16,float16,0,0.22127999862035116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.26929599046707153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,1,128,1,float16,float16,0,0.21870400508244833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.815770705540975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,12,4,128,1,float16,float16,0,0.3660586675008138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.04387733340263367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.4777919848759969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.07672533392906189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,4,128,1,float16,float16,0,0.21354132890701294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,12,2,128,1,float16,float16,0,0.20820266008377075
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.21609600385030112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,12,128,1,float16,float16,0,3.5596745808919272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,1.601194699605306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,4.224464098612468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,2.2112852732340493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,4,128,1,float16,float16,0,5.395994822184245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.7775519688924154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,1,128,1,float16,float16,0,5.079183896382649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,12,2,128,1,float16,float16,0,5.077040036519368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,6.7640533447265625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,1,128,1,float16,float16,0,2.2801705996195474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,1.0938186645507812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,2,128,1,float16,float16,0,2.4164907137552896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,12,4,128,1,float16,float16,0,2.9346081415812173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,1,128,1,float16,float16,0,1.1498613357543945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,2.049743970235189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.37860266367594403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,12,128,1,float16,float16,0,1.7249867121378581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,2,128,1,float16,float16,0,1.2329333623250325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.5217653512954712
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,3.125589370727539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.07611733178297679
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,1,128,1,float16,float16,0,0.6207253138224283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,2,128,1,float16,float16,0,0.635482668876648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,12,128,1,float16,float16,0,0.8887253602345785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,1.02565336227417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,1.5444533030192058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.18569066127141318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,12,4,128,1,float16,float16,0,1.4143733978271484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,12,4,128,1,float16,float16,0,0.707541306813558
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,12,128,1,float16,float16,0,0.44019198417663574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,1,128,1,float16,float16,0,0.32226133346557617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.04709866642951965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,2,128,1,float16,float16,0,0.32102932532628375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.09152000149091084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.4936213493347168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,12,4,128,1,float16,float16,0,0.33535468578338623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,12,128,1,float16,float16,0,0.19337600469589233
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.2646613319714864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.8136800130208334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.03791466603676478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,1,128,1,float16,float16,0,0.18054932355880737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,4,128,1,float16,float16,0,0.19545066356658936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,12,2,128,1,float16,float16,0,0.18247467279434204
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.06061333417892456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.40086400508880615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.14677332838376364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,1,128,1,float16,float16,0,0.10867733756701152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,2,128,1,float16,float16,0,0.1074773371219635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.03395200024048487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,12,128,1,float16,float16,0,0.11412266890207927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.0521066685517629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.2732853293418884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,12,4,128,1,float16,float16,0,0.11191466450691223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.09858666857083638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,1.1890986760457356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,12,128,1,float16,float16,0,2.342965284983317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,4,128,1,float16,float16,0,3.623413403828939
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,1,128,1,float16,float16,0,2.896426518758138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,2.819642702738444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,12,2,128,1,float16,float16,0,3.031248092651367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,1,128,1,float16,float16,0,1.3761439323425293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.7615359624226888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,1.5650506019592285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,4,128,1,float16,float16,0,1.658522605895996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.5602773427963257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,12,2,128,1,float16,float16,0,1.5308106740315754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,12,128,1,float16,float16,0,1.149733304977417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.22926932573318481
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,3.9803412755330405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,12,128,1,float16,float16,0,0.553274671236674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,2,128,1,float16,float16,0,0.7609600226084391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.3599040110905965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,4,128,1,float16,float16,0,0.8772853215535482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.6422239939371744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,12,1,128,1,float16,float16,0,0.7356746991475424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,1.3924585978190105
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,1.979925314585368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.06723733246326447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.11710932850837708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,1,128,1,float16,float16,0,0.36106133460998535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,2,128,1,float16,float16,0,0.4025973478953044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.9465440114339193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,12,4,128,1,float16,float16,0,0.43621333440144855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,12,128,1,float16,float16,0,0.27243733406066895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.2844853401184082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.03477866699298223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.07727466523647308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,2,128,1,float16,float16,0,0.20177600781122842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,4,128,1,float16,float16,0,0.2141546607017517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,12,128,1,float16,float16,0,0.12045333782831828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.16114133596420288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.44234132766723633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,12,1,128,1,float16,float16,0,0.19850132862726846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,1,128,1,float16,float16,0,0.11814933021863301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.028789333999156952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,4,128,1,float16,float16,0,0.11949867010116577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,12,2,128,1,float16,float16,0,0.1157919963200887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.26842667659123737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.1195146640141805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.046037331223487854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.02537599951028824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,2,128,1,float16,float16,0,0.07763200004895528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,4,128,1,float16,float16,0,0.08222400148709615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.1570133368174235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.040522667268911995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,1,128,1,float16,float16,0,0.07725333174069722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,12,12,128,1,float16,float16,0,0.08448533217112224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.07257066667079926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,12,128,1,float16,float16,0,2.522576014200846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,1.5621226628621419
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,1,128,1,float16,float16,0,2.77509339650472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,1.991487979888916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,2,128,1,float16,float16,0,3.0812479654947915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,1,128,1,float16,float16,0,1.353941281636556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.7654186884562174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,4.102474530537923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,12,4,128,1,float16,float16,0,3.4339253107706704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,3.1885598500569663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,2,128,1,float16,float16,0,1.4623093605041504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.9692160288492838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,12,128,1,float16,float16,0,1.2569279670715332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,12,4,128,1,float16,float16,0,1.7045173645019531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.3471999963124593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,1.6007572809855144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,2,128,1,float16,float16,0,0.7498772939046224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.45686399936676025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,1,128,1,float16,float16,0,0.37532798449198407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,4,128,1,float16,float16,0,0.878485361735026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,12,128,1,float16,float16,0,0.6105600198109945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,2,128,1,float16,float16,0,0.39602665106455487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.06053866446018219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.15454399585723877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.9824159940083822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,12,128,1,float16,float16,0,0.31385066111882526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,12,4,128,1,float16,float16,0,0.4638400077819824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,1,128,1,float16,float16,0,0.18344000975290933
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,12,1,128,1,float16,float16,0,0.6913332939147949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.3396639823913574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.7593866984049479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.49396800994873047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,2,128,1,float16,float16,0,0.18893333276112875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.036117332677046456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.06555733581384023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,1,128,1,float16,float16,0,0.100490669409434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,2.083445390065511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.16756266355514526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.027893332143624622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.23704000314076742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,12,4,128,1,float16,float16,0,0.2065920035044352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.04151466737190882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,4,128,1,float16,float16,0,0.11516799529393514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.10152000188827515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,12,128,1,float16,float16,0,0.07425599793593089
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,2,128,1,float16,float16,0,0.1049066682656606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.15811199943224588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,1,128,1,float16,float16,0,0.05974400043487549
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.022848000129063923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,2,128,1,float16,float16,0,0.06440000236034393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.03603200117746989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,12,4,128,1,float16,float16,0,0.07347733279069264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,1,128,1,float16,float16,0,0.03623999903599421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.05898666878541311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,12,12,128,1,float16,float16,0,0.11423466602961223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.04333333174387614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,2,128,1,float16,float16,0,0.03806933263937632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,12,128,1,float16,float16,0,0.03984000037113825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,12,4,128,1,float16,float16,0,0.03912533322970072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.09438932935396831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.05526400109132131
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.06517333288987477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,12,128,1,float16,float16,0,2.407013257344564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,3.140080134073893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,1.5655840237935383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,1,128,1,float16,float16,0,2.4507039388020835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,1,128,1,float16,float16,0,0.8878133296966553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,2,128,1,float16,float16,0,2.635152022043864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.8286933104197184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,1.9620320002237956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,2,128,1,float16,float16,0,1.0258453687032063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.9579626719156901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,12,4,128,1,float16,float16,0,3.1459840138753257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,2.867786725362142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,1.4081546465555828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,12,128,1,float16,float16,0,1.3137973149617512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.3442293405532837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,1,128,1,float16,float16,0,0.4511200189590454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,1.5154773394266765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,12,4,128,1,float16,float16,0,1.330672025680542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,12,128,1,float16,float16,0,0.59224534034729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,1,128,1,float16,float16,0,0.24121065934499106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.6921652952829996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,4,128,1,float16,float16,0,0.663429339726766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,12,2,128,1,float16,float16,0,0.5147413412729899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.4161440134048462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.6265386740366617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.11129066348075867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,2,128,1,float16,float16,0,0.26394667228062946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,12,4,128,1,float16,float16,0,0.3256959915161133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,12,128,1,float16,float16,0,0.2411200006802877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.2630133430163066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.05345066885153452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.03279466678698858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,4,128,1,float16,float16,0,0.1395680010318756
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,12,128,1,float16,float16,0,0.07633066674073537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.048298666874567665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.12569066882133484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.34541865189870197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,2,128,1,float16,float16,0,0.1220960021018982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,1,128,1,float16,float16,0,0.06425066788991292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.024469333390394848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,2,128,1,float16,float16,0,0.07125866909821828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.15504533052444458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,12,4,128,1,float16,float16,0,0.08109866579373677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,12,1,128,1,float16,float16,0,0.11257066329320271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.06300800045331319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.032111999889214836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.020053333292404812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,12,128,1,float16,float16,0,0.04252799848715464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.10062932968139648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.025173333783944447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,2,128,1,float16,float16,0,0.03821333249409994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,1,128,1,float16,float16,0,0.03590933233499527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,12,4,128,1,float16,float16,0,0.04191466669241587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.04232533276081085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,1,128,1,float16,float16,0,0.024192000428835552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,12,128,1,float16,float16,0,0.027813332776228588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.07005333403746287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,2,128,1,float16,float16,0,0.025301332275072735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.04019733270009359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.04516266783078512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,12,4,128,1,float16,float16,0,0.027119999130566914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.051781331499417625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,12,128,1,float16,float16,0,0.01844266677896182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.029189333319664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,2,128,1,float16,float16,0,0.01695999999841054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,1,128,1,float16,float16,0,0.01691199963291486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.029829333225886028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.04049066702524821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,12,4,128,1,float16,float16,0,0.018191999445358913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.03161599983771642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,1,128,1,float16,float16,0,0.7288586298624674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.9440693060557047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.7676746845245361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,2,128,1,float16,float16,0,0.9502773284912109
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,12,128,1,float16,float16,0,1.2173173427581787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,12,4,128,1,float16,float16,0,1.3029653231302898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,1.4262773195902507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,1,128,1,float16,float16,0,0.33400531609853107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.3450719912846883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,2,128,1,float16,float16,0,0.4243466854095459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.4108266830444336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,1,128,1,float16,float16,0,0.17525867621103922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.5658400058746338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.05090666810671488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,12,128,1,float16,float16,0,0.5944213469823202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.6183093388875326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.08541333675384521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,2,128,1,float16,float16,0,0.19951466719309488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.23443732659022012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,1,128,1,float16,float16,0,0.08004266520341237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,12,4,128,1,float16,float16,0,0.5949866771697998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.03012799968322118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.043935999274253845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.25756265719731647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,2,128,1,float16,float16,0,0.08880533774693807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,12,128,1,float16,float16,0,0.22220800320307413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,12,4,128,1,float16,float16,0,0.2738560040791829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.097461332877477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,12,128,1,float16,float16,0,0.05726933479309082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,1,128,1,float16,float16,0,0.04363200068473816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,12,4,128,1,float16,float16,0,0.10428266723950703
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.05032533407211304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.1097866694132487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.028378665447235107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,2,128,1,float16,float16,0,0.047322665651639305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,12,4,128,1,float16,float16,0,0.055306668082873024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,12,128,1,float16,float16,0,0.03278933217128118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,1,128,1,float16,float16,0,0.025946666797002155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,2,128,1,float16,float16,0,0.02794666588306427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.021738665799299877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.06585066517194112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,1.3178826967875164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,1,128,1,float16,float16,0,0.017338667064905167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,12,128,1,float16,float16,0,0.020597333709398907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,12,4,128,1,float16,float16,0,0.03180799881617228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.037248000502586365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.06041066845258077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.03222399950027466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,4,128,1,float16,float16,0,0.020117333779732387
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,1,128,1,float16,float16,0,0.011994666109482447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,12,128,1,float16,float16,0,0.013477332890033722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.045034666856129967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.026234666506449383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.03654933224121729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,12,2,128,1,float16,float16,0,0.01812800019979477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,2,128,1,float16,float16,0,0.012159999459981918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.029317334294319153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,12,4,128,1,float16,float16,0,0.013157332936922709
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,12,128,1,float16,float16,0,0.011850666254758835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.03124266614516576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,2,128,1,float16,float16,0,0.011525332927703857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,1,128,1,float16,float16,0,0.011477333803971609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.024288001159826916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.020576000213623047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.02366400013367335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,12,4,128,1,float16,float16,0,0.01179733375708262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.02386133372783661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,1,128,1,float16,float16,0,0.3377973238627116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,2,128,1,float16,float16,0,0.4867039918899536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.40941866238911945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.34195200602213544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,12,4,128,1,float16,float16,0,0.598144014676412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.616869330406189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,12,128,1,float16,float16,0,0.5943573315938314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.4771626790364583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,1,128,1,float16,float16,0,0.16030933459599814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,2,128,1,float16,float16,0,0.18331732352574667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.05045866469542185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.08527466654777527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,12,4,128,1,float16,float16,0,0.2688266634941101
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,1,128,1,float16,float16,0,0.06348266700903575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,2,128,1,float16,float16,0,0.07131200035413106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.030000001192092896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,12,128,1,float16,float16,0,0.21985600392023721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,12,128,1,float16,float16,0,0.04881600042184194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.24388800064722696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.08583999673525493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.040906667709350586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,12,4,128,1,float16,float16,0,0.08796266714731853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.08721066514650981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,1,128,1,float16,float16,0,0.03569599986076355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,4,128,1,float16,float16,0,0.04738133152325948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.21237866083780924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.02584533393383026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.04483733574549357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.046997333566347756
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,12,2,128,1,float16,float16,0,0.03961066653331121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,1,128,1,float16,float16,0,0.02145066608985265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,12,128,1,float16,float16,0,0.027962667246659596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,2,128,1,float16,float16,0,0.02293866624434789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.018725333114465077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.02847466617822647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,12,4,128,1,float16,float16,0,0.026906666656335194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.01434133326013883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.028698667883872986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,12,128,1,float16,float16,0,0.01806933308641116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,2,128,1,float16,float16,0,0.015423999478419622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.015386667102575302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,1,128,1,float16,float16,0,0.014538666854302088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,12,4,128,1,float16,float16,0,0.01732800031701724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,12,128,1,float16,float16,0,0.011066666493813196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,1,128,1,float16,float16,0,0.009599999835093817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.021744000415007275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.013471999516089758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,4,128,1,float16,float16,0,0.010725333044926325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,12,2,128,1,float16,float16,0,0.009797333429257074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.018250666558742523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.013637332866589228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,12,128,1,float16,float16,0,0.009349333122372627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,4,128,1,float16,float16,0,0.009205333267649015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,12,2,128,1,float16,float16,0,0.009018666421373686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,12,128,1,float16,float16,0,0.008714666590094566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.016186666985352833
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,1,128,1,float16,float16,0,0.008581333483258883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,2,128,1,float16,float16,0,0.008645333349704742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.016421332955360413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,12,4,128,1,float16,float16,0,0.008725333337982496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,12,128,1,float16,float16,0,0.21653334299723306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,2,128,1,float16,float16,0,0.1832159956296285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.08987200260162354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.05162666738033295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.19325333833694458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,1,128,1,float16,float16,0,0.1583093305428823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,12,4,128,1,float16,float16,0,0.2671839992205302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.23267199595769247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,12,128,1,float16,float16,0,0.046256000796953835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,4,128,1,float16,float16,0,0.0824533353249232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.07750399907430013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,2,128,1,float16,float16,0,0.06664533416430156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.03035733352104823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.06235733131567637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,12,1,128,1,float16,float16,0,0.05884266893068949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.04203199843565623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,1,128,1,float16,float16,0,0.03356799980004629
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,2,128,1,float16,float16,0,0.03734400123357773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,12,4,128,1,float16,float16,0,0.04494399825731913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.025797332326571148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.041936000188191734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,12,128,1,float16,float16,0,0.026026666164398193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.01658133293191592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,2,128,1,float16,float16,0,0.021274665991465252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,1,128,1,float16,float16,0,0.019610666980346043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,12,4,128,1,float16,float16,0,0.02515733242034912
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.018581333259741466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.025829332570234936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,1,128,1,float16,float16,0,0.013829333086808523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.036858665446440377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.02229333420594533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,2,128,1,float16,float16,0,0.014639999717473984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.014352000008026758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,12,128,1,float16,float16,0,0.01729600007335345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.015392000476519266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,12,4,128,1,float16,float16,0,0.016672000288963318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,1,128,1,float16,float16,0,0.009290666629870733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.018346666047970455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.013487999637921652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,2,128,1,float16,float16,0,0.009423999736706415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.015386667102575302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,4,128,1,float16,float16,0,0.010431999961535135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,12,12,128,1,float16,float16,0,0.010911999891201654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,12,128,1,float16,float16,0,0.00922133338948091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.014858666807413101
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,2,128,1,float16,float16,0,0.008698666468262672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,12,4,128,1,float16,float16,0,0.009482666850090027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,12,128,1,float16,float16,0,0.008805333326260248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,1,128,1,float16,float16,0,0.008405333384871483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.012442667037248611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,2,128,1,float16,float16,0,0.00850133349498113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.013317332913478216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,12,4,128,1,float16,float16,0,0.008549333239595095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,1,128,1,float16,float16,0,0.008383999889095625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.012234666695197424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,2,128,1,float16,float16,0,0.008341333518425623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,12,128,1,float16,float16,0,0.008586666857202848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,12,4,128,1,float16,float16,0,0.008421333506703377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.012426666915416718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,12,128,1,float16,float16,0,0.0643039991458257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,1,128,1,float16,float16,0,0.09773866335550944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.035317334036032356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,2,128,1,float16,float16,0,0.10619733730951945
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,12,4,128,1,float16,float16,0,0.12084800004959106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.10794132947921753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.05595199763774872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,1,128,1,float16,float16,0,0.05146666864554087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.10054933031400044
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.025098666548728943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,2,128,1,float16,float16,0,0.0553706685702006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.030997333427270252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,12,4,128,1,float16,float16,0,0.06286400059858958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.05681066711743673
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,12,128,1,float16,float16,0,0.03610666592915853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,1,128,1,float16,float16,0,0.029701332251230877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.055455997586250305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.02063999945918719
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,2,128,1,float16,float16,0,0.03139200061559677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.022677332162857056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,12,128,1,float16,float16,0,0.0204373337328434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,12,4,128,1,float16,float16,0,0.035418666899204254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.017968000223239262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,2,128,1,float16,float16,0,0.017914666483799618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.03091199944416682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,4,128,1,float16,float16,0,0.019909333437681198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,12,1,128,1,float16,float16,0,0.01722666621208191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.02310933421055476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.036917333801587425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,12,128,1,float16,float16,0,0.014240000396966934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.017887999614079792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.020421333611011505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,4,128,1,float16,float16,0,0.013818666338920593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,12,1,128,1,float16,float16,0,0.012730666746695837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,1,128,1,float16,float16,0,0.008522666369875273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.018863999595244724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,2,128,1,float16,float16,0,0.008650666723648706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,12,128,1,float16,float16,0,0.009098666409651438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,12,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,1,128,1,float16,float16,0,0.008192000289758047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,12,128,1,float16,float16,0,0.008453333129485449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.0163680004576842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,2,128,1,float16,float16,0,0.008298666526873907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,12,128,1,float16,float16,0,0.00821333316465219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,12,4,128,1,float16,float16,0,0.008373333141207695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.016314666718244553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,1,128,1,float16,float16,0,0.008069333309928576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,4,128,1,float16,float16,0,0.008080000057816505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,12,2,128,1,float16,float16,0,0.008037333066264788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,12,128,1,float16,float16,0,0.008026666939258575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,2,128,1,float16,float16,0,0.007930666829148928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.016117333124081295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,12,1,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.016602666427691776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,12,128,1,float16,float16,0,0.05431999762852987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,1,128,1,float16,float16,0,0.0906826655069987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,2,128,1,float16,float16,0,0.09447999795277913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.02683199942111969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,12,4,128,1,float16,float16,0,0.10236266255378723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.04403733213742574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.07827199995517731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.08413333694140117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,1,128,1,float16,float16,0,0.04773333172003428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.020970667401949566
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,2,128,1,float16,float16,0,0.04941866795221964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.024682665864626568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,12,4,128,1,float16,float16,0,0.05340266724427541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.043151999513308205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,12,128,1,float16,float16,0,0.030762667457262676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,1,128,1,float16,float16,0,0.027535999814669292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.01798933371901512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.04693333307902018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,2,128,1,float16,float16,0,0.02828799933195114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.019808000574509304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,12,4,128,1,float16,float16,0,0.030394665896892548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,12,128,1,float16,float16,0,0.017637333522240322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.023792001108328503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.03181866556406021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,2,128,1,float16,float16,0,0.016234666109085083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,1,128,1,float16,float16,0,0.016261332978804905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,12,128,1,float16,float16,0,0.012629333883523941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,1,128,1,float16,float16,0,0.01219733307758967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,12,4,128,1,float16,float16,0,0.01738133281469345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.017573333034912746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.018853332847356796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,2,128,1,float16,float16,0,0.012122667084137598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.018085333208243053
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.016666666915019352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,12,4,128,1,float16,float16,0,0.01239466667175293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,12,128,1,float16,float16,0,0.008522666369875273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,2,128,1,float16,float16,0,0.008277333031098047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,1,128,1,float16,float16,0,0.008272000278035799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.016496000190575916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,12,128,1,float16,float16,0,0.00814933329820633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,12,4,128,1,float16,float16,0,0.008469333251317343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,2,128,1,float16,float16,0,0.007962666451931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,1,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,12,4,128,1,float16,float16,0,0.008031999692320824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.0161920003592968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,12,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,1,128,1,float16,float16,0,0.007850666840871176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,2,128,1,float16,float16,0,0.007877333089709282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,12,4,128,1,float16,float16,0,0.00795199970404307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.016805333395799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.016303999970356624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,1,128,1,float16,float16,0,0.007802666475375493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,2,128,1,float16,float16,0,0.007791999727487564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.01621333385507266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,12,128,1,float16,float16,0,0.0080960001796484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,12,4,128,1,float16,float16,0,0.007893333211541176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,12,128,1,float16,float16,0,0.04376000165939331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.04081599911053976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,1,128,1,float16,float16,0,0.013471999516089758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,2,128,1,float16,float16,0,0.025050667424996693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.021226666867733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,12,4,128,1,float16,float16,0,0.039317332208156586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.03294399877389272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,1,128,1,float16,float16,0,0.009461333354314169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,12,128,1,float16,float16,0,0.025605333348115284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,2,128,1,float16,float16,0,0.01708799973130226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.02587199956178665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,12,4,128,1,float16,float16,0,0.02426133304834366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.014314666390419006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.020400000115235645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.011744000017642975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.011882666498422623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,12,4,128,1,float16,float16,0,0.016544000556071598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,12,128,1,float16,float16,0,0.015050667027632395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.01402666668097178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.011557333171367645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,2,128,1,float16,float16,0,0.012549333274364471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,12,128,1,float16,float16,0,0.011605333536863327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,12,4,128,1,float16,float16,0,0.012784000486135483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,1,128,1,float16,float16,0,0.00890666681031386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.01181866725285848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.013642666240533194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,2,128,1,float16,float16,0,0.012330666184425354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,12,4,128,1,float16,float16,0,0.012586666891972223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,12,128,1,float16,float16,0,0.007925333455204964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.011498666057984034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.013503999759753546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,2,128,1,float16,float16,0,0.012170666207869848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,12,4,128,1,float16,float16,0,0.012245333443085352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,12,128,1,float16,float16,0,0.007781333600481351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.010858666151762009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,2,128,1,float16,float16,0,0.011994666109482447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,12,4,128,1,float16,float16,0,0.012213333199421564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,12,128,1,float16,float16,0,0.007770666852593422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,2,128,1,float16,float16,0,0.011957333733638128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,12,4,128,1,float16,float16,0,0.012096000214417776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,12,128,1,float16,float16,0,0.007696000238259633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,1,128,1,float16,float16,0,0.0085333331177632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.010559999694426855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,2,128,1,float16,float16,0,0.0086666668454806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,12,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.011407999942700068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.7017014821370444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,7.120581309000651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.4586346944173176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,12.879530588785807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,3.8581387201944985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,1,128,1,float16,float16,0,13.438954671223959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,2,128,1,float16,float16,0,13.084991455078125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,2,128,1,float16,float16,0,26.67065684000651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,1,128,1,float16,float16,0,27.0872319539388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,8,4,128,1,float16,float16,0,27.333585103352863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,29.093551635742188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,4,128,1,float16,float16,0,13.482810974121094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,28.612960815429688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.8070666790008545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,6.573488235473633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,1,128,1,float16,float16,0,6.175258636474609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,1.951807975769043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,13.653818766276041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,14.908629099527994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,2,128,1,float16,float16,0,6.064474741617839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,4,128,1,float16,float16,0,6.420325597127278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,5.488976160685222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,14.223253885904947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.4010506470998128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,3.010869344075521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,7.080330530802409
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,1,128,1,float16,float16,0,2.9876960118611655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,1.1950720151265461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,2,128,1,float16,float16,0,3.0055414835611978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,6.923903783162435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,4,128,1,float16,float16,0,2.910773277282715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,2.974325180053711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,3.710325241088867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.6771999994913738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,13.663125356038412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,6.758656183878581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,4.4412533442179365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,28.446805318196613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.9985653559366862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,7.703439712524414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,1,128,1,float16,float16,0,7.224997202555339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,1,128,1,float16,float16,0,15.294554392496744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,2,128,1,float16,float16,0,15.366666158040365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,2.383002599080404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,8,4,128,1,float16,float16,0,15.324666341145834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,2,128,1,float16,float16,0,7.205263773600261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,16.591028849283855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,3.474389394124349
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,4,128,1,float16,float16,0,7.635274887084961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,16.096922556559246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.49318933486938477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,8.321952184041342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,7.695994695027669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,1,128,1,float16,float16,0,3.4054508209228516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,1.2404212951660156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,2,128,1,float16,float16,0,3.2837066650390625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,4,128,1,float16,float16,0,3.5842720667521157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,8.07920010884603
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,1.9502132733662922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,4.011568069458008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,3.7094879150390625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.2707786758740743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,1,128,1,float16,float16,0,1.673722743988037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.9144319693247477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,2,128,1,float16,float16,0,1.7029013633728027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,4,128,1,float16,float16,0,1.6221973101298015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,16.255903879801433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,4.311034520467122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,1.6859253247578938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,8.215407689412435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,2.354618708292643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.3265066941579182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,3.773434638977051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,3.070512135823568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6786826451619467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,5.462954839070638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,1,128,1,float16,float16,0,4.883781433105469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,1.816373348236084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,1,128,1,float16,float16,0,11.65988286336263
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,2,128,1,float16,float16,0,4.653024037679036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,2,128,1,float16,float16,0,11.499696095784506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,8,4,128,1,float16,float16,0,11.170112609863281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,12.015210469563803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,2.748501459757487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,4,128,1,float16,float16,0,5.209568023681641
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,5.6823469797770185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.36859198411305744
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,5.671333312988281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,11.470570882161459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,1,128,1,float16,float16,0,2.3186987241109214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.9858026504516602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,5.588490804036458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,2,128,1,float16,float16,0,2.4691999753316245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,4,128,1,float16,float16,0,2.3087946573893228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,1.2574400107065837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.19803732633590698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,1,128,1,float16,float16,0,1.1803946495056152
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,3.2484054565429688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,2,128,1,float16,float16,0,1.1230613390604656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.5876213312149048
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,2.3570987383524575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,2.7997919718424478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,4,128,1,float16,float16,0,1.133962631225586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,1.192090670267741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,11.258800506591797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,1.6024586359659831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,5.317562739054362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,2.558677355448405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.8554026285807292
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,4.167290687561035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.9244799613952637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,7.097946802775065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,1,128,1,float16,float16,0,6.26643180847168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,2.2937493324279785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,1,128,1,float16,float16,0,14.1082394917806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,2,128,1,float16,float16,0,6.455504099527995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,2,128,1,float16,float16,0,14.726272583007812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,8,4,128,1,float16,float16,0,14.523226420084635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,4,128,1,float16,float16,0,7.088053385416667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,16.123483022054035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,15.001941680908203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.49028801918029785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,3.2005866368611655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,7.486997604370117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,1.2871519724527996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,1,128,1,float16,float16,0,3.122426668802897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,7.174010594685872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,2,128,1,float16,float16,0,3.062719980875651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,4,128,1,float16,float16,0,3.254255930582682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,7.23957888285319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,1.7226293881734211
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,3.0827201207478843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.2688373327255249
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,3.663818677266439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,1,128,1,float16,float16,0,1.528271993001302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,3.4345760345458984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.6721440156300863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,2,128,1,float16,float16,0,1.4086292584737141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,4,128,1,float16,float16,0,1.5326026280721028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.8296319643656412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,1.9530879656473796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,1,128,1,float16,float16,0,0.7797919909159342
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.1425386667251587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,1.6814346313476562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,7.275077184041341
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,2,128,1,float16,float16,0,0.7822986443837484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.4347039858500163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,1.7571679751078289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,4,128,1,float16,float16,0,0.759872039159139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,3.4565280278523765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,1.1452319622039795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,15.198453267415365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.9288907051086426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,1.2454880078633626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,1.7502986590067546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,2.6309653917948403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,3.6558186213175454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,1,128,1,float16,float16,0,7.762186686197917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.612058679262797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,1,128,1,float16,float16,0,3.078794797261556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,2,128,1,float16,float16,0,7.9500376383463545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,8,4,128,1,float16,float16,0,8.171333312988281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,1.297983964284261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,9.052170435587565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,8.307429631551107
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,2,128,1,float16,float16,0,3.64244810740153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,1.9385439554850261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,4,128,1,float16,float16,0,3.8344799677530923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,4.376325289408366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,4.043855985005696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.26614399751027423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,1,128,1,float16,float16,0,1.8534132639567058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.7357760270436605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,2,128,1,float16,float16,0,1.9608480135599773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,4.36135991414388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,4,128,1,float16,float16,0,1.8812479972839355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,1.850671927134196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.9574933052062988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,2.0359412829081216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,1,128,1,float16,float16,0,0.9351200262705485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.16656532883644104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.4043360153834025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,4,128,1,float16,float16,0,0.8661119937896729
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,2,128,1,float16,float16,0,0.8768853346506754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,2.06660795211792
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,8.346970876057943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.9720426400502523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,1.2354933420817058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.47250668207804364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.13124799728393555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,2,128,1,float16,float16,0,0.46244800090789795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.2656373381614685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,1,128,1,float16,float16,0,0.48051198323567706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,4,128,1,float16,float16,0,0.46635735034942627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,1.9259413083394368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,1.071994702021281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.7729120254516602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.46376534303029376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,3.8931681315104165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,1.0368106365203857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,1.4791146914164226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,2.8399839401245117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,3.6353012720743814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.7009600003560384
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,1,128,1,float16,float16,0,2.943962732950846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,2,128,1,float16,float16,0,7.462565104166667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,4,128,1,float16,float16,0,7.264106750488281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,1.4122880299886067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,8,1,128,1,float16,float16,0,6.687072118123372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,8.510629018147787
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,7.852437337239583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,2,128,1,float16,float16,0,3.0255254109700522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,4,128,1,float16,float16,0,3.3178720474243164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,1,128,1,float16,float16,0,1.4978826840718586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.36289068063100177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,4.165114720662435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,3.9003146489461265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,1.8594293594360352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.710208018620809
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,2,128,1,float16,float16,0,1.6997920672098796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,3.6479145685831704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,4,128,1,float16,float16,0,1.61190398534139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.9055573145548502
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,1,128,1,float16,float16,0,0.8719306786855062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,1.9403573671976726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,1.9872585932413738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.13237866759300232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,2,128,1,float16,float16,0,0.7620586554209391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,1.8369654019673665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.37653334935506183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,4,128,1,float16,float16,0,0.8787039915720621
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,7.909008026123047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,3.5704854329427085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.43801601727803546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,1.14684263865153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.9000426928202311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,1,128,1,float16,float16,0,0.4175306558609009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.09875733653704326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,1.0265386899312336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.2189226746559143
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,4,128,1,float16,float16,0,0.4384853442509969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,1.7608693440755208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,2,128,1,float16,float16,0,0.49353599548339844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.43148799737294513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.26800533135732013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.6238186756769816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,1,128,1,float16,float16,0,0.24770132700602213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.07712000111738841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,2,128,1,float16,float16,0,0.2863840063412984
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.5816106796264648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.13310399651527405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,4,128,1,float16,float16,0,0.2555413246154785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.2516640027364095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.9875573317209879
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.40837331612904865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.7128907044728597
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,1.0046933492024739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,1.8509492874145508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,2.266335964202881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,2,128,1,float16,float16,0,3.5020373662312827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,1,128,1,float16,float16,0,3.6071786880493164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,8,4,128,1,float16,float16,0,3.9172051747639975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,1,128,1,float16,float16,0,1.6316746075948079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,4.6433760325113935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.49538667996724445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,2,128,1,float16,float16,0,1.9450400670369465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.8717813491821289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,4.217882792154948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,4,128,1,float16,float16,0,2.022245407104492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,1.1926453113555908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,2.308677355448405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,2.222501277923584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.17800533771514893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,1,128,1,float16,float16,0,0.8739039897918701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,2.0507359504699707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.4158720175425212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,2,128,1,float16,float16,0,0.9275573094685873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,4,128,1,float16,float16,0,1.1065173149108887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.5696693261464437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,1.119439999262492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,4.3558454513549805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,1,128,1,float16,float16,0,0.47470935185750324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,1.1524159908294678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.10500267148017883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,1.0566240151723225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,2,128,1,float16,float16,0,0.45051733652750653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,2.031263987223307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.22499734163284302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,4,128,1,float16,float16,0,0.535594662030538
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.5782080094019572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.6136533419291178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.26898666222890216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.06547733147939046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,1.0555360317230225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.15954666336377463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.5985759894053141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,2,128,1,float16,float16,0,0.26652799050013226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,4,128,1,float16,float16,0,0.26712000370025635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,1,128,1,float16,float16,0,0.2558666666348775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.2726293404897054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.39421868324279785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.1599999964237213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.055344000458717346
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,1,128,1,float16,float16,0,0.15945067008336386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.10781332850456238
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,4,128,1,float16,float16,0,0.15848533312479654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.3470240036646525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.5751519997914633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,2,128,1,float16,float16,0,0.15381333231925964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.29289066791534424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.1607360045115153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.3442666530609131
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,2.4616266886393228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,1.2587626775105794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,4.728261311848958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,4,128,1,float16,float16,0,3.8900906244913735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,2.0666240056355796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,1,128,1,float16,float16,0,3.42305596669515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.593450665473938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,4.244778633117676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,1,128,1,float16,float16,0,1.5792373021443684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,8,2,128,1,float16,float16,0,3.7170772552490234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.9966933727264404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,2,128,1,float16,float16,0,1.6746719678243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,2.4595252672831216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,1.1990933418273926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,8,4,128,1,float16,float16,0,2.0708319346110025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.25676266352335614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,2.3203840255737305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,2.0687840779622397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,4.2351681391398115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,4,128,1,float16,float16,0,1.0514933268229167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,2,128,1,float16,float16,0,0.9168000221252441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.47621333599090576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,1,128,1,float16,float16,0,0.8157813549041748
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,1.1695679823557537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,1.1789066791534424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,1,128,1,float16,float16,0,0.41467734177907306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.6553226709365845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,2,128,1,float16,float16,0,0.44338667392730713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.19112533330917358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,4,128,1,float16,float16,0,0.5081813335418701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.07834666470686595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,1.0496266682942708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,2.1168160438537598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.050053333242734276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.6004960139592489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.6445333162943522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.2580053408940633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,1,128,1,float16,float16,0,0.23121066888173422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.5083786646525065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,2,128,1,float16,float16,0,0.25172267357508343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,4,128,1,float16,float16,0,0.25128533442815143
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,1.0532639821370442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.2523840069770813
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.32979732751846313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.11979200442632039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.14999999602635702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.042303999265034996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.5027253230412801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,2,128,1,float16,float16,0,0.1338879962762197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,1,128,1,float16,float16,0,0.1307253340880076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.31997867425282794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,4,128,1,float16,float16,0,0.1500746707121531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.07597866654396057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.1437333325544993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.19248000780741373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,1,128,1,float16,float16,0,0.09176533420880635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.15685333808263144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.31301333506902057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,2,128,1,float16,float16,0,0.09006399909655254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.09515733520189922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03937066594759623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.06762666503588359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,4,128,1,float16,float16,0,0.09733333190282185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.09464533130327861
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.12896000345547995
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.15653333067893982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,1.5686987241109211
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.8905973434448242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,4,128,1,float16,float16,0,2.5578346252441406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,3.052351951599121
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,1,128,1,float16,float16,0,1.9810400009155273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,1.404080073038737
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,1,128,1,float16,float16,0,0.9430027008056641
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,8,2,128,1,float16,float16,0,2.122981389363607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.4173440138498942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.642250657081604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,2.5747200647989907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,2,128,1,float16,float16,0,1.0953280131022136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,1.5049227078755696
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,1.2228960196177165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,4,128,1,float16,float16,0,1.2613226572672527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,1,128,1,float16,float16,0,0.5008746782938639
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.10475200414657593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,2.6180319786071777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.7438027064005533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,1.5491520563761394
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.29893867174784344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,2,128,1,float16,float16,0,0.5278453429539999
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,4,128,1,float16,float16,0,0.6442026694615682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.7501599788665771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.7157013416290283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.3795413176218669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.06273599962393443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,1.2769920031229656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,1,128,1,float16,float16,0,0.25128533442815143
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.13041067123413086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,2,128,1,float16,float16,0,0.2659200032552083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,4,128,1,float16,float16,0,0.31065599123636883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.37881068388621014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.6335733334223429
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.16521599888801575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,1,128,1,float16,float16,0,0.144378662109375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,4,128,1,float16,float16,0,0.16646400094032288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.6533973217010498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.09006399909655254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.0395413339138031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,2,128,1,float16,float16,0,0.1518346667289734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.32150934139887494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.36314666271209717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.16590933005015054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.21173866589864096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.09688533345858256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,1,128,1,float16,float16,0,0.08872532844543457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.034330666065216064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.3283466696739197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.05807466804981232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.20844799280166626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,4,128,1,float16,float16,0,0.09719467163085938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.09676800171534221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,8,2,128,1,float16,float16,0,0.09126399954160054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.1551413337389628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.05526933570702871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,2,128,1,float16,float16,0,0.050474668542544045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,4,128,1,float16,float16,0,0.054986665646235146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,1,128,1,float16,float16,0,0.04971733192602793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.1509226659933726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.053727999329566956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.209114670753479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.09824533263842265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.05611200133959452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.1485973298549652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,1.66756804784139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,2.755370775858561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,1,128,1,float16,float16,0,1.9015146891276042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,1.1859359741210938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,1,128,1,float16,float16,0,0.9641760190327963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,2,128,1,float16,float16,0,2.1079626083374023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,1.6622400283813477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.5484266678492228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,2,128,1,float16,float16,0,1.052613337834676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.8005759716033936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,4,128,1,float16,float16,0,1.3173973560333252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,8,4,128,1,float16,float16,0,2.5789546966552734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,3.2364320755004883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,1.641541322072347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.8382666905721029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,1.6676692962646484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,2.7497758865356445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.3467093308766683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,4,128,1,float16,float16,0,0.6833226680755615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.8318453629811605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,2,128,1,float16,float16,0,0.5558613141377767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,1.3517600695292156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.7507200241088867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.4128906726837158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.21044800678888956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,8,1,128,1,float16,float16,0,0.49571200211842853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,1,128,1,float16,float16,0,0.2407840092976888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,1.3447413444519043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,2,128,1,float16,float16,0,0.2775839964548747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.1308746635913849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,4,128,1,float16,float16,0,0.33854401111602783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.16571733355522156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.42555733521779376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.6636639833450317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.3004213372866313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.6579733292261759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.058464000622431435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.035631999373435974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,2,128,1,float16,float16,0,0.14194666345914206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.07354133327802022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.1711039940516154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.1994719902674357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,4,128,1,float16,float16,0,0.15954132874806723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.09231999516487122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.2998080054918925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,8,1,128,1,float16,float16,0,0.1346506675084432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.4081386725107829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.18886399269104004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,1,128,1,float16,float16,0,0.08065600196520488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,2,128,1,float16,float16,0,0.08489066362380981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.04701333244641622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,4,128,1,float16,float16,0,0.09300266702969869
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.029685333371162415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.12286399801572163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.18949333826700845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,1,128,1,float16,float16,0,0.044480000933011375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.025450666745503742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,2,128,1,float16,float16,0,0.04665066798528036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.050848002235094704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.11408000191052754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.041322665909926094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,4,128,1,float16,float16,0,0.05012266834576925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.0749120016892751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.05051200091838837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.09187199672063191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.03881066789229711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.07533866663773854
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,2,128,1,float16,float16,0,0.02784000088771184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.03009066730737686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,1,128,1,float16,float16,0,0.026698666314284008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.053685332338015236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,4,128,1,float16,float16,0,0.030133334298928578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.057029331723848976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.11582400401433308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.07507733503977458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.03010133405526479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,1.1603946685791016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,1.6061546007792156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,1,128,1,float16,float16,0,1.694159984588623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,2,128,1,float16,float16,0,1.880725383758545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,1.5778719584147136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,2.03492800394694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,8,4,128,1,float16,float16,0,2.380309263865153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,2.471754709879557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,1,128,1,float16,float16,0,0.6151253382364908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,2,128,1,float16,float16,0,0.7602346738179525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,4,128,1,float16,float16,0,1.0716319878896077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.8009226322174072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.9514400164286295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.7277226448059082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,1.223680019378662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.5308320124944051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,1.619333267211914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,2.0489706993103027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.1992959976196289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,1,128,1,float16,float16,0,0.31850665807724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.5493173201878866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.2892906665802002
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,2,128,1,float16,float16,0,0.38392531871795654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.3549333413441976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,4,128,1,float16,float16,0,0.529365340868632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.45154666900634766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,1,128,1,float16,float16,0,0.15263467033704123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,2,128,1,float16,float16,0,0.18609599272410074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.04766400158405304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.09973333279291789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,4,128,1,float16,float16,0,0.25206400950749713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.29522132873535156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.798373301823934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.1241919994354248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.948479970296224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.19921600818634033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.3649493455886841
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.45495466391245526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,1,128,1,float16,float16,0,0.08258133133252461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,4,128,1,float16,float16,0,0.10589333375295003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.059061333537101746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.1213759978612264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.19732266664505005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.12186666329701741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.1378933290640513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,1,128,1,float16,float16,0,0.04577599962552389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,8,2,128,1,float16,float16,0,0.09141866366068523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.022810667753219604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.07310933371384938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,4,128,1,float16,float16,0,0.05835733314355215
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,2,128,1,float16,float16,0,0.04972266654173533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.05881600081920624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.033957332372665405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.05070933202902476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.03427733232577642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,1,128,1,float16,float16,0,0.02812800059715907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.06490133206049602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,2,128,1,float16,float16,0,0.03002133220434189
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.019189332922299702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,4,128,1,float16,float16,0,0.03416533271471659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.027903998891512554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.034330666065216064
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.12392533818880717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.047839999198913574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.020495999604463577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,1,128,1,float16,float16,0,0.017498667041460674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.06411199768384297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.05373866856098175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.040735999743143715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,4,128,1,float16,float16,0,0.0204373337328434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.02053333322207133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.04417600234349569
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.03283733377854029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.05353599786758423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.0176959993938605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,1,128,1,float16,float16,0,0.016496000190575916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,8,2,128,1,float16,float16,0,0.0183146670460701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.03235200047492981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.025040000677108765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,2,128,1,float16,float16,0,0.016629333297411602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.02548266698916753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,4,128,1,float16,float16,0,0.017690667261679966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.032458665470282234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.017781333376963932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,1,128,1,float16,float16,0,0.49353599548339844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.8032159805297852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.7192746798197428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.5294506549835205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,2,128,1,float16,float16,0,0.7000319957733154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,1,128,1,float16,float16,0,0.2348533272743225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.7495840390523275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,8,4,128,1,float16,float16,0,1.0458292961120605
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,1.1781013011932373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,2,128,1,float16,float16,0,0.318725327650706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.27588800589243573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.4724213282267253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.8699893156687418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,8,4,128,1,float16,float16,0,0.48982401688893634
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.3521653413772583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.7559413115183512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.1994453271230062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,1,128,1,float16,float16,0,0.1079253355662028
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.3624853293100993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.04247466723124186
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,4,128,1,float16,float16,0,0.20990399519602457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.07829866806666057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.36167999108632404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.10014399886131287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.14802133043607077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,8,2,128,1,float16,float16,0,0.12863467137018839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.026608000199000042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.2348639965057373
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.04135466615358988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.36343999703725177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,1,128,1,float16,float16,0,0.05872533222039541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.10414933164914449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,4,128,1,float16,float16,0,0.08152533570925395
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.04533866544564565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.10140267014503479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,1,128,1,float16,float16,0,0.03323733309904734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.14647466937700906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,8,2,128,1,float16,float16,0,0.06645333270231883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.027322667340437572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,2,128,1,float16,float16,0,0.0372533326347669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.01940800001223882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,4,128,1,float16,float16,0,0.04457066456476847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.0524533341328303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.045328001181284584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026736001173655193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.08787199854850769
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.08835732936859131
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.04108799993991852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,1,128,1,float16,float16,0,0.020773333807786305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.015962666521469753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,2,128,1,float16,float16,0,0.02250133454799652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.02103466788927714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,4,128,1,float16,float16,0,0.026341333985328674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026608000199000042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.03437866767247518
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,1,128,1,float16,float16,0,0.012655999511480331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.015530666957298914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.04133866727352142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.0429013321797053
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,2,128,1,float16,float16,0,0.013397333522637686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,4,128,1,float16,float16,0,0.015504000087579092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.029274667302767437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.015765332927306492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.03402666747570038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.03737066686153412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.04274666806062063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.012837332983811697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.022181332111358643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,1,128,1,float16,float16,0,0.011594666788975397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.027600000301996868
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,2,128,1,float16,float16,0,0.01191466674208641
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,4,128,1,float16,float16,0,0.012810666114091873
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.025098666548728943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.026816000541051228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.012986666212479273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.011605333536863327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,1,128,1,float16,float16,0,0.011301333705584208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,2,128,1,float16,float16,0,0.011247999966144562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.021984001000722248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,4,128,1,float16,float16,0,0.011584000041087469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.021546666820844013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.02754666656255722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.02162133405605952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.011503999431928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.02181866765022278
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.3676053285598755
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,1,128,1,float16,float16,0,0.2466826637585958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.19244267543156943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,2,128,1,float16,float16,0,0.31587199370066327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.29686399300893146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,8,4,128,1,float16,float16,0,0.4880693356196086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.4656533400217692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.29757867256800336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,1,128,1,float16,float16,0,0.08566400408744812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.042405332128206887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,2,128,1,float16,float16,0,0.11923199892044067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.08397866288820903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.06783999999364217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.11717333396275838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,4,128,1,float16,float16,0,0.19497599204381308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.2956906755765279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,1,128,1,float16,float16,0,0.04660800099372864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.21482133865356445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,2,128,1,float16,float16,0,0.054618666569391884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.03800000001986822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.36449599266052246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,4,128,1,float16,float16,0,0.06957333286603291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.02672533442576726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.08528000116348267
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.1167733371257782
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.039173332353432976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.08151466647783916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.0618453323841095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.01937599976857503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,2,128,1,float16,float16,0,0.031040000418821972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.03902400036652883
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.045093332727750145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.06169066826502482
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.024122667809327442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,4,128,1,float16,float16,0,0.03851199895143509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.022442666192849476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,8,1,128,1,float16,float16,0,0.027066667874654133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.03428266694148382
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.017797333498795826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.015840000162522
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,4,128,1,float16,float16,0,0.022133332987626392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.022757334013779957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.02757333219051361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,2,128,1,float16,float16,0,0.01823466643691063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,8,1,128,1,float16,float16,0,0.016757333030303318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.013386666774749756
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,2,128,1,float16,float16,0,0.01121066634853681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.014085333794355392
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.03436266630887985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,1,128,1,float16,float16,0,0.0102613332370917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.021157334248224895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,4,128,1,float16,float16,0,0.013167999684810638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.02197866638501485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.013317332913478216
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.010389333590865135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.013429333766301474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,1,128,1,float16,float16,0,0.009141333401203156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.022111999491850536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,2,128,1,float16,float16,0,0.009354666496316591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.013477332890033722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.018613333503405254
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,4,128,1,float16,float16,0,0.010399999717871347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.010415999839703241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.017925333231687546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.016544000556071598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,2,128,1,float16,float16,0,0.008832000195980072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.018538666268189747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.009077333534757296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,4,128,1,float16,float16,0,0.009045333291093508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.009103999783595404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.008698666468262672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,1,128,1,float16,float16,0,0.008538666491707167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,2,128,1,float16,float16,0,0.008469333251317343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.01655999943614006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,4,128,1,float16,float16,0,0.008746666833758354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.008896000062425932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.016106666376193363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.08570133646329244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,1,128,1,float16,float16,0,0.08019199967384338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.042437334855397545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.06723199784755707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,2,128,1,float16,float16,0,0.09889066219329834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,8,4,128,1,float16,float16,0,0.19618666172027588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.20701332887013754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.09150399764378865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,1,128,1,float16,float16,0,0.04423466821511587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,2,128,1,float16,float16,0,0.05243200063705444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.02643733223279317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.03782933453718821
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,4,128,1,float16,float16,0,0.06717866659164429
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.07379733522733052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.037231999138991036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.08250133196512859
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,1,128,1,float16,float16,0,0.02532266577084859
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04929066697756449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,2,128,1,float16,float16,0,0.02906133234500885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.09328533212343852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.023978665471076965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,4,128,1,float16,float16,0,0.036415999134381614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.021226666867733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.040192000567913055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.027834666272004444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.03707200040419897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,1,128,1,float16,float16,0,0.015317333241303762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.015850666910409927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,4,128,1,float16,float16,0,0.02086399992307027
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,2,128,1,float16,float16,0,0.016869333883126576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.012784000486135483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.017727999637524288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04916266600290934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.02517866591612498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.021274665991465252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.027802666028340656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.013829333086808523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.01833600054184596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,4,128,1,float16,float16,0,0.012682666381200155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,2,128,1,float16,float16,0,0.010570666442314783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,1,128,1,float16,float16,0,0.009952000031868616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.01293333371480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.018031999468803406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.010384000216921171
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,2,128,1,float16,float16,0,0.009205333267649015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.01817600056529045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.014783999572197596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,1,128,1,float16,float16,0,0.009050666665037474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,4,128,1,float16,float16,0,0.010234666367371878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.01339200014869372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.010421333213647207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.014581333845853806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.008826666822036108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,2,128,1,float16,float16,0,0.008698666468262672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,4,128,1,float16,float16,0,0.008826666822036108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.00892800030608972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,2,128,1,float16,float16,0,0.008394666636983553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.012453333785136541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.00847999999920527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,4,128,1,float16,float16,0,0.008527999743819237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.008400000010927519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,1,128,1,float16,float16,0,0.008298666526873907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.012144000579913458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.008576000109314919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,2,128,1,float16,float16,0,0.008378666515151659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.012458667159080505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.012298667182525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,4,128,1,float16,float16,0,0.008320000022649765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.008458666503429413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.05064533154169718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,1,128,1,float16,float16,0,0.06960000097751617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.03139200061559677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.05266666909058889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,2,128,1,float16,float16,0,0.07771733403205872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,8,4,128,1,float16,float16,0,0.09243733684221904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.1029866635799408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.07724800209204356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,1,128,1,float16,float16,0,0.03900266687075297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.023584000766277313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,2,128,1,float16,float16,0,0.042597333590189614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.029317334294319153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,4,128,1,float16,float16,0,0.05013333261013031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.05506666501363119
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.05070933202902476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.027893332143624622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,1,128,1,float16,float16,0,0.02204799900452296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.019706666469573975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,2,128,1,float16,float16,0,0.02365333338578542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.04267199834187826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.07760000228881836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,4,128,1,float16,float16,0,0.02752000093460083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.02777066578467687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.02995733420054118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.01599466676513354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.023413332800070446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.02181333303451538
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.04238933324813843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,2,128,1,float16,float16,0,0.013994666437307993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,1,128,1,float16,float16,0,0.01322666679819425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,4,128,1,float16,float16,0,0.01600533351302147
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.016085332880417507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.01868266612291336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.022154666483402252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.01748266691962878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.01002133327225844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,2,128,1,float16,float16,0,0.00915733352303505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.023685333629449207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.018853332847356796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.017845333864291508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,4,128,1,float16,float16,0,0.010053333515922228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.010133333504199982
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.01863466699918111
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.008709333216150602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,1,128,1,float16,float16,0,0.008352000266313553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,2,128,1,float16,float16,0,0.00854399986565113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,1,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.017029333859682083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.008858666444818178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,2,128,1,float16,float16,0,0.008234666660428047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.01637866720557213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,4,128,1,float16,float16,0,0.008234666660428047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.008197333042820295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.008165333420038223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.01647466669480006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.016421332955360413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,2,128,1,float16,float16,0,0.008063999935984612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,1,128,1,float16,float16,0,0.007925333455204964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.008223999912540117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.008074666683872541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,1,128,1,float16,float16,0,0.00795199970404307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.016943999876578648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,2,128,1,float16,float16,0,0.008026666939258575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.008069333309928576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.041893333196640015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,1,128,1,float16,float16,0,0.06417066852251689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,2,128,1,float16,float16,0,0.06763733426729839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.02518933266401291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.04242133100827535
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,8,4,128,1,float16,float16,0,0.07549333572387695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.07558933397134145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.0637600024541219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,1,128,1,float16,float16,0,0.036090667049090065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.020106667031844456
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,2,128,1,float16,float16,0,0.03756800045569738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.023520000278949738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,4,128,1,float16,float16,0,0.04160533348719279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.04146666576464971
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.04182933270931244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.02312533309062322
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.03496533383925756
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,1,128,1,float16,float16,0,0.020288000504175823
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.017504000415404636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.06434133152167003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,2,128,1,float16,float16,0,0.020997333029905956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,4,128,1,float16,float16,0,0.02293866624434789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.0230880007147789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.023285334308942158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.013647999614477158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.01995733380317688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.03514666606982549
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,1,128,1,float16,float16,0,0.012416000167528788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,4,128,1,float16,float16,0,0.013584000368913015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,2,128,1,float16,float16,0,0.01249066616098086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.013674666484196981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.008799999952316284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,2,128,1,float16,float16,0,0.0084906667470932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.02004266654451688
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,1,128,1,float16,float16,0,0.008447999755541483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,4,128,1,float16,float16,0,0.008752000207702318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.01657066618402799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.008245333408315977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.008874666566650072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,1,128,1,float16,float16,0,0.008176000167926153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.016634666671355564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.0164533331990242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,2,128,1,float16,float16,0,0.008101333553592363
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,4,128,1,float16,float16,0,0.008293333152929941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.016234666109085083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.008303999900817871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,1,128,1,float16,float16,0,0.007946666950980822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,2,128,1,float16,float16,0,0.007962666451931
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.016117333124081295
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.01640533283352852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.008047999814152718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,4,128,1,float16,float16,0,0.008037333066264788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.016074666132529575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,1,128,1,float16,float16,0,0.007925333455204964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,2,128,1,float16,float16,0,0.007840000092983246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,4,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.007989333321650824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,1,128,1,float16,float16,0,0.007765333478649457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,2,128,1,float16,float16,0,0.007797333101431529
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.008112000301480293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,4,128,1,float16,float16,0,0.007983999947706858
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.007903999959429106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,8,128,1,float16,float16,0,0.032458665470282234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.031930667658646904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,1,128,1,float16,float16,0,0.013274667163689932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,2,128,1,float16,float16,0,0.024613333245118458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.020714666694402695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,8,4,128,1,float16,float16,0,0.038719999293486275
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.014650666465361914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.032431999842325844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,1,128,1,float16,float16,0,0.009381333366036415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.012042666474978128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,2,128,1,float16,float16,0,0.016672000288963318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,8,128,1,float16,float16,0,0.01865600049495697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,4,128,1,float16,float16,0,0.023845332364241283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.014186666657527288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,8,8,128,1,float16,float16,0,0.03257599969704946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.020282667130231857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.031712000568707786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.019823999454577763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,2,128,1,float16,float16,0,0.01293333371480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.011546666423479715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.01181866725285848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,4,128,1,float16,float16,0,0.016735999534527462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.013717333475748697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,8,8,128,1,float16,float16,0,0.01863466699918111
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,8,128,1,float16,float16,0,0.01156266654531161
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,1,128,1,float16,float16,0,0.008821333448092142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.013546666751305262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.019839999576409657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,2,128,1,float16,float16,0,0.01257066677014033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,4,128,1,float16,float16,0,0.012800000607967377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,8,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,8,8,128,1,float16,float16,0,0.011541333049535751
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.011482667177915573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.013397333522637686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.013621332744757334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,2,128,1,float16,float16,0,0.012186666329701742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,4,128,1,float16,float16,0,0.012410666793584824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,8,8,128,1,float16,float16,0,0.008069333309928576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.01138666644692421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,8,128,1,float16,float16,0,0.007781333600481351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,1,128,1,float16,float16,0,0.008816000074148178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.013418667018413544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,2,128,1,float16,float16,0,0.012298667182525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,4,128,1,float16,float16,0,0.01228800043463707
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,8,128,1,float16,float16,0,0.007717333113153775
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,8,8,128,1,float16,float16,0,0.007887999837597212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,1,128,1,float16,float16,0,0.008693333094318708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,2,128,1,float16,float16,0,0.012042666474978128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,4,128,1,float16,float16,0,0.012138667205969492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,8,8,128,1,float16,float16,0,0.007653333246707916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,8,128,1,float16,float16,0,0.007663999994595845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,1,128,1,float16,float16,0,0.008458666503429413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,2,128,1,float16,float16,0,0.011936000237862269
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,4,128,1,float16,float16,0,0.012096000214417776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,8,8,128,1,float16,float16,0,0.007637333124876022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,8,128,1,float16,float16,0,0.007541333635648091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,1,128,1,float16,float16,0,0.008394666636983553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.010842667271693548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,2,128,1,float16,float16,0,0.008506666868925095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,4,128,1,float16,float16,0,0.008618666479984919
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,8,8,128,1,float16,float16,0,0.007658666620651881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.010602666685978571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,3.770352045694987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,1,128,1,float16,float16,0,5.992778778076172
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,6.272895812988281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,1.9483413696289062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,2,128,1,float16,float16,0,6.315167744954427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,4,1,128,1,float16,float16,0,12.938426971435547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,4,2,128,1,float16,float16,0,13.1058349609375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,6.49509874979655
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,14.448501586914062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,14.634512583414713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,7.25276247660319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,3.4040212631225586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,1.3408907254536946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,1,128,1,float16,float16,0,2.9794079462687173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,2,128,1,float16,float16,0,3.0235732396443686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,1.4992639223734539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,3.175845464070638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,3.912538528442383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.6707786719004313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,7.064399719238281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,1,128,1,float16,float16,0,1.5043840408325195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,2,128,1,float16,float16,0,1.6850934028625488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,3.4528961181640625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,1.4620960553487141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,2.167194684346517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,14.207684834798178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,6.810373306274414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,2.371509393056234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,3.5207999547322593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,1.261898676554362
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,3.328245480855306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,4,1,128,1,float16,float16,0,7.277119954427083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,1,128,1,float16,float16,0,3.1269280115763345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,4,2,128,1,float16,float16,0,7.376549402872722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,2,128,1,float16,float16,0,3.1269760131835938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,8.32758903503418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,2.0606346130371094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,8.03929074605306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,3.5944639841715493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,3.8252747853597007
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.7539040247599283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,1,128,1,float16,float16,0,1.7790826161702473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,2,128,1,float16,float16,0,1.7721707026163738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,1.7173120180765789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.9099893569946289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,3.989749272664388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,2.362607955932617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.4546613295873006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,7.856330871582031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,2,128,1,float16,float16,0,0.9113866488138834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,1,128,1,float16,float16,0,0.9303466478983561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,2.1059254010518393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,3.8513921101888022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.890671968460083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,1.4365386962890625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,1.6136587460835774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,2.2426986694335938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,1,128,1,float16,float16,0,2.098181406656901
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,2.408682664235433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.8501706918080648
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,4,1,128,1,float16,float16,0,4.697312037150065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,4,2,128,1,float16,float16,0,4.68997319539388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,2,128,1,float16,float16,0,2.2491092681884766
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,5.487328211466472
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,2.498725255330404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,1.3940587043762207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,2.886154810587565
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,1,128,1,float16,float16,0,1.1472960313161213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.6539573272069296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,5.295712153116862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,2,128,1,float16,float16,0,1.2764053344726562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,1.250816027323405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,3.0159838994344077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,1.6950666109720867
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.7382187048594157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,5.090122540791829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.3466613292694092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,1,128,1,float16,float16,0,0.6686506271362305
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,2,128,1,float16,float16,0,0.6922293504079183
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.6873439947764078
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,2.675408045450846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,1.0750933488210042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,1.5651946067810059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,1.709653377532959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,2.200965404510498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,1.094213326772054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,3.1304267247517905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,1,128,1,float16,float16,0,2.826618512471517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,2,128,1,float16,float16,0,2.884928067525228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,4,1,128,1,float16,float16,0,6.504629135131836
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,4,2,128,1,float16,float16,0,6.98908805847168
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,3.3953173955281577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,7.458896001180013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,7.35911496480306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,3.4206771850585938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,1.6856320699055989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.604533314704895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,1,128,1,float16,float16,0,1.6124265988667805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.8428213596343994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,2,128,1,float16,float16,0,1.8102399508158367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,1.7647786140441895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,3.64625612894694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,2.042799949645996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.4625226656595866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,1,128,1,float16,float16,0,0.7540533542633057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,2,128,1,float16,float16,0,0.8369867006937662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,2.001797358194987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.870789368947347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,1.1212693055470784
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,3.5118185679117837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,7.479674657185872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.4674239953358968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,1,128,1,float16,float16,0,0.4567199945449829
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.264847993850708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,2,128,1,float16,float16,0,0.45338666439056396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,1.0353919665018718
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.44912532965342206
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.7796746889750162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,2.111039956410726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,1.1373653411865234
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,1.3191306591033936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.6644853353500366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,1.9319945971171062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,1,128,1,float16,float16,0,1.6598985989888508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,4,1,128,1,float16,float16,0,3.1363840103149414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,2,128,1,float16,float16,0,1.6756213506062825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,4,2,128,1,float16,float16,0,3.4943040211995444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,3.800426801045736
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,3.9847679138183594
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,1.8797492980957031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,0.9683252970377604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,2.007823944091797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.4461119969685872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,1,128,1,float16,float16,0,0.9185280005137125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,2,128,1,float16,float16,0,0.8808053334554037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,2.2594985961914062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,1.0697920322418213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.512826681137085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,0.9901493390401205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,3.9101759592692056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,1,128,1,float16,float16,0,0.4749973217646281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.24926400184631348
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,2,128,1,float16,float16,0,0.47014399369557697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,1.0897760391235352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.5188533465067545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.7011093298594157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,2.030165354410807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.2858933409055074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,1,128,1,float16,float16,0,0.2744479974110921
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.2265333334604899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.6472533146540324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,1.0945546627044678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.2869759996732076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,4,2,128,1,float16,float16,0,0.283242662747701
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.5182133515675863
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.6541759967803955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,1.9585493405659993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,1.4655359586079915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,1,128,1,float16,float16,0,1.5136960347493489
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,4,2,128,1,float16,float16,0,3.2691733042399087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,3.5364745457967124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,4,1,128,1,float16,float16,0,2.982272148132324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,3.9816907246907554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.6892053286234537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,2,128,1,float16,float16,0,1.731221357981364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,1.8956640561421711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,1,128,1,float16,float16,0,0.7580053011576334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,1.926645278930664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,1.050096035003662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.3516586621602376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,1.9076587359110515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,2,128,1,float16,float16,0,0.8041706879933676
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,1.00491197903951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,1.0494986375172932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.4654453198115031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,1,128,1,float16,float16,0,0.41285332043965656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,3.624154726664225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,1.8818666140238445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.9820000330607096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.2124639948209127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,2,128,1,float16,float16,0,0.4230026801427205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.6032533248265585
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.4691520134607951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.13893866539001465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,1,128,1,float16,float16,0,0.24226667483647665
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,2,128,1,float16,float16,0,0.24769065777460733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.6204213301340739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.26395734151204425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,1.1794986724853516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.25727466742197674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.3678079843521118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.16576533516248068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,1,128,1,float16,float16,0,0.15742933750152588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.3401493231455485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.6088159879048666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.16403733690579733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.23979200919469199
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.34643733501434326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.12210667133331299
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,4,2,128,1,float16,float16,0,0.17675199111302695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,1.214634656906128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.3929866552352905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.894933303197225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,4,2,128,1,float16,float16,0,2.001322587331136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,2.18833065032959
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,2.409605344136556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,4,1,128,1,float16,float16,0,1.8665653864542644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,1,128,1,float16,float16,0,0.9499306678771973
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,2,128,1,float16,float16,0,0.9603040218353271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,1.1626026630401611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.6182933251063029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,2,128,1,float16,float16,0,0.4806613524754842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,1,128,1,float16,float16,0,0.45791999499003094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.21223467588424683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,1.1611519654591878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,1.221882661183675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.6052159865697225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.5851413408915201
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,2.06003204981486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,1,128,1,float16,float16,0,0.25754666328430176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.28276266654332477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,2,128,1,float16,float16,0,0.26503467559814453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.1579093337059021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,1.1661333243052165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.2860959966977437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.17003200451533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.3863840103149414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.6185280084609985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.6165813207626343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.3442773421605428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.10394133130709331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,2,128,1,float16,float16,0,0.16054399808247885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,1,128,1,float16,float16,0,0.15180266896883646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.16962667306264242
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.28859732548395794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.10925333698590596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,1,128,1,float16,float16,0,0.10406399766604106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.09365333120028178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.34354666868845624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.2908959984779358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.17709332704544067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,2,128,1,float16,float16,0,0.10469333330790202
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.10956266522407532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.2947733402252197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,1.271946668624878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,2.125312010447184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,1,128,1,float16,float16,0,0.9024693171183268
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,1.0472479661305745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,4,1,128,1,float16,float16,0,1.8188907305399578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.4575893481572469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,4,2,128,1,float16,float16,0,2.0084586143493652
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,2,128,1,float16,float16,0,0.9761919975280762
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,1.1703200340270996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.6543573141098022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,2.4388853708902993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,1.2510240077972412
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,2.0204319953918457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.6415146589279175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,2,128,1,float16,float16,0,0.5076160033543905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.18997333447138467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,4,1,128,1,float16,float16,0,0.43025068442026776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.5775626500447592
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.11123200257619222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,1,128,1,float16,float16,0,0.23689599831899008
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.28754132986068726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,1.1034986972808838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.5632426738739014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,1.0861546993255615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.1607253352801005
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.2860479950904846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.31514134009679157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.3291520078976949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,1,128,1,float16,float16,0,0.13401599725087485
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.5723839998245239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.07637866834799449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,4,2,128,1,float16,float16,0,0.251306672890981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,2,128,1,float16,float16,0,0.14044800400733948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.18763200441996256
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.1572480003039042
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.16742400328318277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.3306293288866679
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.10089066624641418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,2,128,1,float16,float16,0,0.09269332885742188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,1,128,1,float16,float16,0,0.08973333239555359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.06816533207893372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.10021332899729411
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.12668266892433167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.052789335449536644
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.16688533624013266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,1,128,1,float16,float16,0,0.04666133224964142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.12933866182963052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.0525439977645874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.12084266543388367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.0662720004717509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.12875200311342874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,4,2,128,1,float16,float16,0,0.048991998036702476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.8068906466166178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.29797865947087604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,1.3253333568572998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,1,128,1,float16,float16,0,0.5168853203455607
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.6547679901123047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,4,1,128,1,float16,float16,0,1.0963466962178547
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.8258506457010905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,4,2,128,1,float16,float16,0,0.6330666542053223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.4117013216018677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.7119839986165365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,1.3133546511332195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,4,2,128,1,float16,float16,0,1.2213280200958252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.7289013067881266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,1.5035947163899739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,2,128,1,float16,float16,0,0.29765866200129193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.13050666451454163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,1,128,1,float16,float16,0,0.2624693314234416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.18174399932225546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.32153600454330444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.40782399972279865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.08799466490745544
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,1,128,1,float16,float16,0,0.14749866724014282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,2,128,1,float16,float16,0,0.16241600116093954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.3582506577173869
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.20542933543523154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.10599999626477559
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.057087997595469155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.21753066778182983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,2,128,1,float16,float16,0,0.09604266285896301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.17773334185282388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,1,128,1,float16,float16,0,0.08912533521652222
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.10620799660682678
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.15195733308792114
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.3613813320795695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.06000000238418579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.2155253291130066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.707530657450358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,1,128,1,float16,float16,0,0.05042133231957754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.15493333339691162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,2,128,1,float16,float16,0,0.05386666456858317
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.053269331653912864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.09683199723561604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.060218666990598045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.04053866614898046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,1,128,1,float16,float16,0,0.0366239994764328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.0509493350982666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.0981119970480601
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.15610133608182272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.04058666775623957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,4,2,128,1,float16,float16,0,0.0376800000667572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.0928106705347697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.09790933132171631
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,1,128,1,float16,float16,0,0.508784015973409
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,4,1,128,1,float16,float16,0,1.0599359671274822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.3413013219833374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.8251573244730631
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.8740693728129069
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,1.3856800397237141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,4,2,128,1,float16,float16,0,1.247429370880127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,1.6520800590515137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,2,128,1,float16,float16,0,0.6488000154495239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.7492372989654541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.4545546770095825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,1.3722772598266602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.7327253023783366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.45114131768544513
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.359279990196228
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,2,128,1,float16,float16,0,0.31618666648864746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.20973332722981772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.3500320116678874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.7258186340332031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,1,128,1,float16,float16,0,0.13506666819254556
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,2,128,1,float16,float16,0,0.15943466623624167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.0711839993794759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,4,1,128,1,float16,float16,0,0.25659199555714923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.19129067659378052
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.12878933548927307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.10533333818117778
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.19803200165430704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.3470613161722819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.8689760367075602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,1,128,1,float16,float16,0,0.08231466511885326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.04613333443800608
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.11806933085123698
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.10493333141009013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,4,2,128,1,float16,float16,0,0.09117333094278972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.19506667057673135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.1968160072962443
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,1,128,1,float16,float16,0,0.045653333266576133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.12130666772524516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.04104000081618627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,2,128,1,float16,float16,0,0.05078400174776713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.07213866710662842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.033439998825391136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.12257599830627441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.03806933263937632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,2,128,1,float16,float16,0,0.02951466788848241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,1,128,1,float16,float16,0,0.02773866554101308
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.05816533168156942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.07382399837176006
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.03340800106525421
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.028938665986061096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.06660266717274983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.07364266614119212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.04500266909599304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,2,128,1,float16,float16,0,0.026975999275843304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.0496373325586319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,1,128,1,float16,float16,0,0.026170666019121807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.046800002455711365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.04976533353328705
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.02887466549873352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.05851200222969055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.28279467423756915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,1,128,1,float16,float16,0,0.34994133313496906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.9907413323720297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,4,1,128,1,float16,float16,0,0.9534560044606527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,2,128,1,float16,float16,0,0.5046986738840739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.549727996190389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.8184373378753662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.7827093601226807
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,4,2,128,1,float16,float16,0,1.1426400343577068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.39296531677246094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.8139146963755289
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.5189173221588135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,1.2510080337524414
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,1,128,1,float16,float16,0,0.17640533049901327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.9796906312306722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.24234133958816528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.25387199719746906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.5253013372421265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.097653329372406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.3902346690495809
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.048672000567118325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,2,128,1,float16,float16,0,0.10518399874369304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.12486400206883748
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.07384000221888225
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,4,2,128,1,float16,float16,0,0.2346400022506714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.24129066864649454
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.14114666978518167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.14989333351453146
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,1,128,1,float16,float16,0,0.08721066514650981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.14151466886202493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,1,128,1,float16,float16,0,0.04878933231035868
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.06679999828338623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,2,128,1,float16,float16,0,0.057248001297314964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.13923199971516928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.07207466661930084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.0734986662864685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.0330079992612203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.04138133426507314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,1,128,1,float16,float16,0,0.029845332105954487
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.027717334528764088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.024170666933059692
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.07619733115037282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.046896000703175865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.04130133241415024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.045194665590922035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,4,2,128,1,float16,float16,0,0.03377600014209747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,1,128,1,float16,float16,0,0.018346666047970455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.02502399931351344
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,2,128,1,float16,float16,0,0.020266667008399963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.04015466570854187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.046944002310434975
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.024293333292007446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.019600000232458115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.03616533428430557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,1,128,1,float16,float16,0,0.01676799977819125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,2,128,1,float16,float16,0,0.017632000148296356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.031727999448776245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.0335413341720899
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.01960533360640208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.03622400015592575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.021525333325068157
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,1,128,1,float16,float16,0,0.016255999604860943
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,2,128,1,float16,float16,0,0.01632000009218852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.02179199953873952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.017360000560681026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.017450666675964992
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.023813332120577495
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.02385066697994868
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,1,128,1,float16,float16,0,0.12273066242535909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.3784853219985962
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.4273066520690918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.07689066727956136
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,2,128,1,float16,float16,0,0.19036267201105753
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,4,1,128,1,float16,float16,0,0.2826293309529622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.2748960057894389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,4,2,128,1,float16,float16,0,0.4721173445383708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.47310932477315265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.11707733074824016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.4052799940109253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.18398932615915933
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.41342933972676593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.19395200411478677
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,2,128,1,float16,float16,0,0.07977599898974101
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,1,128,1,float16,float16,0,0.0641599992911021
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.18434667587280273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.03890133400758108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,1,128,1,float16,float16,0,0.03616533428430557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.13264532883961996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.026250667870044708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.09179199735323589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.1037600040435791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,2,128,1,float16,float16,0,0.043935999274253845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.05916266640027364
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.046869332591692604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.03345066557327906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.059194669127464294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.02109333376089732
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.05026133358478546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.10363733768463135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,1,128,1,float16,float16,0,0.02216533323129018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,2,128,1,float16,float16,0,0.026005332668622334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.033520000676314034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.03180266668399175
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.05013866722583771
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.01926933353145917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,2,128,1,float16,float16,0,0.01553600033124288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.0183999997874101
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.019205333044131596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.014720000326633453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.026837334036827087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.029792000850041706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,1,128,1,float16,float16,0,0.012042666474978128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,4,1,128,1,float16,float16,0,0.01369599997997284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.025455998877684276
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.026591998835404713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.014959999670584997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,4,2,128,1,float16,float16,0,0.012736000120639801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.029898665845394135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,1,128,1,float16,float16,0,0.011407999942700068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.02346133440732956
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.012458667159080505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.021759999295075733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,2,128,1,float16,float16,0,0.011498666057984034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.012613333761692047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.011493333925803503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.02160000056028366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,1,128,1,float16,float16,0,0.011087999989589056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.019909333437681198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.019610666980346043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,2,128,1,float16,float16,0,0.011183999478816986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.019600000232458115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.019600000232458115
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.011674666156371435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,1,128,1,float16,float16,0,0.05230399966239929
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.12476799885431926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.15709867080052695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,4,1,128,1,float16,float16,0,0.10598933696746826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.06449600060780843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,4,2,128,1,float16,float16,0,0.17866667111714682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.17217600345611572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.03579200059175491
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.06800533334414165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,2,128,1,float16,float16,0,0.06874666611353557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.05313600103060404
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.1132586697737376
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.07981333136558533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,1,128,1,float16,float16,0,0.029968000948429108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.1566933294137319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,2,128,1,float16,float16,0,0.037920000652472176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.02943466603755951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.05297600229581197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.03923733284076055
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.07979199786980946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.043194666504859924
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.01773333301146825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,1,128,1,float16,float16,0,0.017994667092959087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,2,128,1,float16,float16,0,0.02184533327817917
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.02495466669400533
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.016864000509182613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.02924266705910365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.026741333305835724
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.043434664607048035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,1,128,1,float16,float16,0,0.011274666835864386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,2,128,1,float16,float16,0,0.012863999853531519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.0200853335360686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.016864000509182613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.012533333152532578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.02677333354949951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.013674666484196981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,1,128,1,float16,float16,0,0.009765333185593287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,2,128,1,float16,float16,0,0.010485333700974783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.012527999778588613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.010058666889866194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.010037333394090334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,4,2,128,1,float16,float16,0,0.009018666421373686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.016490666816631954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.018090666582187016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,1,128,1,float16,float16,0,0.008661333471536636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.016549333930015564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,2,128,1,float16,float16,0,0.008821333448092142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.00892800030608972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.015989333391189575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.00860799973209699
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,1,128,1,float16,float16,0,0.008565333361426989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.016202667107184727
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.012554666648308435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,2,128,1,float16,float16,0,0.008613333106040955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.01589866727590561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.008698666468262672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.051034669081370033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,1,128,1,float16,float16,0,0.027818667391935985
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.06751466790835063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.023029332359631855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,4,1,128,1,float16,float16,0,0.0496373325586319
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.03528533379236857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,4,2,128,1,float16,float16,0,0.06597333153088887
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.06029333174228668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.03397866586844126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,2,128,1,float16,float16,0,0.035936000446478523
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.05109866460164388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.06734399994214375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.02805333336194356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.03666666646798452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.01786133274435997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,2,128,1,float16,float16,0,0.02045866722861926
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,1,128,1,float16,float16,0,0.01653333380818367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.02257599929968516
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.028010666370391846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.016309333344300587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.03669333209594091
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,1,128,1,float16,float16,0,0.010709332923094431
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.02327466756105423
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,2,128,1,float16,float16,0,0.012479999413092932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.01624533285697301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.023013333479563396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.012191999703645706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.013653332988421122
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,2,128,1,float16,float16,0,0.01020800011853377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.014597332725922266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,1,128,1,float16,float16,0,0.009343999748428663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.012272000312805176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.010079999764760336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.01413333291808764
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,2,128,1,float16,float16,0,0.009050666665037474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.01002133327225844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.0141546664138635
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,1,128,1,float16,float16,0,0.008512000242869059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,2,128,1,float16,float16,0,0.008485333373149237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012437333663304647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,1,128,1,float16,float16,0,0.008346666892369589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.012432000289360682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,2,128,1,float16,float16,0,0.008453333129485449
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.008549333239595095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.00878399983048439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.008474666625261307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012367999802033106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012330666184425354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,1,128,1,float16,float16,0,0.008314666648705801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,2,128,1,float16,float16,0,0.008272000278035799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.008314666648705801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.012351999680201212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.008346666892369589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.012330666184425354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.03458133339881897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,1,128,1,float16,float16,0,0.023413332800070446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.05146133402983347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,4,1,128,1,float16,float16,0,0.041759997606277466
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.028229333460330963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,4,2,128,1,float16,float16,0,0.049829334020614624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.021759999295075733
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,2,128,1,float16,float16,0,0.027232001225153606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.027306665976842243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.01970133309563001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.02824000020821889
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,1,128,1,float16,float16,0,0.014090667168299357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.03477866699298223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.01893866683046023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.05143466591835022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,2,128,1,float16,float16,0,0.015856000284353893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.019674666225910187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.0120319997270902
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.028218666712443035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,1,128,1,float16,float16,0,0.00919999989370505
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.018437333405017853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.009658666948477427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,2,128,1,float16,float16,0,0.009925333162148794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.01825599993268649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.011946666985750198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,2,128,1,float16,float16,0,0.008799999952316284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.009648000200589498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.016682667036851246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.018197332819302876
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.016538667182127636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,1,128,1,float16,float16,0,0.008330666770537695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,2,128,1,float16,float16,0,0.00842666688064734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.008650666723648706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,1,128,1,float16,float16,0,0.008122666428486506
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,2,128,1,float16,float16,0,0.008112000301480293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.008143999924262365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.008314666648705801
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.008026666939258575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,1,128,1,float16,float16,0,0.00797333319981893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,2,128,1,float16,float16,0,0.008090666805704435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.016442666451136272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.008010666817426682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,1,128,1,float16,float16,0,0.00789866658548514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.016741332908471424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.0163680004576842
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,2,128,1,float16,float16,0,0.008010666817426682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.00808533343176047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.00795199970404307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.026863999664783478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03990400085846583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,1,128,1,float16,float16,0,0.021295999487241108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,4,1,128,1,float16,float16,0,0.03759466608365377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,4,2,128,1,float16,float16,0,0.04134399940570196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.023584000766277313
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.039077334105968475
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,2,128,1,float16,float16,0,0.022858666876951855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.021898667017618816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.015583999454975128
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.02248000105222066
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.026869334280490875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,1,128,1,float16,float16,0,0.01267733300725619
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,2,128,1,float16,float16,0,0.013541333377361298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03979733337958654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.018474667022625606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.009706666693091393
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.01551466683546702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.019461333751678467
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.01666133354107539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.022490667800108593
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.016602666427691776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.008442666381597519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.009765333185593287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.018378666291634243
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.016607999801635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,2,128,1,float16,float16,0,0.008293333152929941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,1,128,1,float16,float16,0,0.008336000144481659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.01628799984852473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.008592000231146812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.008256000156203905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,1,128,1,float16,float16,0,0.008074666683872541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.016496000190575916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,2,128,1,float16,float16,0,0.008127999802430471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.008229333286484083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.007967999825874964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.016095999628305435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,1,128,1,float16,float16,0,0.007807999849319458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,2,128,1,float16,float16,0,0.007946666950980822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.008143999924262365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.007914666707317034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,1,128,1,float16,float16,0,0.007818666597207388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,2,128,1,float16,float16,0,0.00784533346692721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.007877333089709282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.007925333455204964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,1,128,1,float16,float16,0,0.007861333588759104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.016517333686351776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,2,128,1,float16,float16,0,0.007829333345095316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.016384000579516094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.007840000092983246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,4,128,1,float16,float16,0,0.01883200059334437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.019808000574509304
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.011786667009194693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,1,128,1,float16,float16,0,0.009381333366036415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,4,1,128,1,float16,float16,0,0.012986666212479273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,4,2,128,1,float16,float16,0,0.02405333270629247
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.014090667168299357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.020101333657900494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,2,128,1,float16,float16,0,0.016544000556071598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.013744000345468521
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,4,4,128,1,float16,float16,0,0.01878400022784869
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,4,128,1,float16,float16,0,0.011551999797423681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.013621332744757334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.019904000063737232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,2,128,1,float16,float16,0,0.012639999389648438
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,4,128,1,float16,float16,0,0.008031999692320824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,4,4,128,1,float16,float16,0,0.011653333902359009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.011519999553759893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.011616000284751257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.013647999614477158
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,2,128,1,float16,float16,0,0.012383999923865
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,4,4,128,1,float16,float16,0,0.00797333319981893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,4,128,1,float16,float16,0,0.00784533346692721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.011488000551859537
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,2,128,1,float16,float16,0,0.01219733307758967
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,4,4,128,1,float16,float16,0,0.007823999971151352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,1,128,1,float16,float16,0,0.008746666833758354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,4,128,1,float16,float16,0,0.007823999971151352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,2,128,1,float16,float16,0,0.01210133358836174
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,4,4,128,1,float16,float16,0,0.007770666852593422
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,4,128,1,float16,float16,0,0.007690666864315669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,2,128,1,float16,float16,0,0.011973333855470022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,4,4,128,1,float16,float16,0,0.007760000104705493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,4,128,1,float16,float16,0,0.007530666887760162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,1,128,1,float16,float16,0,0.008559999987483025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,2,128,1,float16,float16,0,0.012053333222866058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,4,4,128,1,float16,float16,0,0.007621333623925845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,4,128,1,float16,float16,0,0.007600000128149986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,1,128,1,float16,float16,0,0.00850133349498113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,2,128,1,float16,float16,0,0.008400000010927519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,4,4,128,1,float16,float16,0,0.007765333478649457
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,2.8656746546427407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,1,128,1,float16,float16,0,2.90554141998291
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,2.964831988016764
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,3.547823905944824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,1.5318934122721355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,1,128,1,float16,float16,0,1.4114185969034831
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16384,2,1,128,1,float16,float16,0,6.050352096557617
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,6.798032124837239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,6.6185760498046875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,3.417738596598307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,1.5225760142008464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,2.179962635040283
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,1,128,1,float16,float16,0,0.8658080101013184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.8137653668721517
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,2.191002686818441
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.851914644241333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,1.2926666736602783
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,6.7121225992838545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,3.679509480794271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,1.8397386868794758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,1.7239413261413574
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,1,128,1,float16,float16,0,1.5569920539855957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,12288,2,1,128,1,float16,float16,0,3.0658187866210938
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,1.801967938741048
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,3.8798774083455405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.904911994934082
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,2.2221973737080893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,3.6098454793294272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,1,128,1,float16,float16,0,1.0287360350290935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,1.9637707074483235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.5390453338623047
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.9664159615834554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,3.602725346883138
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,1,128,1,float16,float16,0,0.5615679820378622
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,1.3858613967895508
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,1.1660160223642986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.5195839802424113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,1.972815990447998
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.8313173453013102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,1.2980159918467205
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,1.2034986813863118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,1,128,1,float16,float16,0,1.1371306578318279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,1.2489226659138997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,10240,2,1,128,1,float16,float16,0,2.2316320737202964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,1.553210735321045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,2.7560907999674478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,2.5885866483052573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.636522650718689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,1,128,1,float16,float16,0,0.7148106892903646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.6832373142242432
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,1.0366240342458088
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,1.4910027186075847
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.3900853395462036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,2.5939787228902182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,1,128,1,float16,float16,0,0.392522652943929
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.3950666586558024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,1.607957363128662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.6190346479415894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.8612426916758219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.7947946389516195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,1.6663840611775715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,1,128,1,float16,float16,0,1.475503921508789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,8192,2,1,128,1,float16,float16,0,2.939061482747396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,1.890394687652588
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,1.681418736775716
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.7947893142700195
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,3.369130770365397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,3.3015785217285156
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,1,128,1,float16,float16,0,0.7617759704589844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,1.0764479637145996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.49354668458302814
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.7889706293741862
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,1.936794598897298
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,1,128,1,float16,float16,0,0.4402346611022949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.9924266338348389
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.44223467508951825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,3.329375902811686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,1.7429386774698894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.7842613061269125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.2820746699968974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,1.1192533175150554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.2878879904747009
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,2,1,128,1,float16,float16,0,0.27737067143122357
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.5338240067164103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.6881813208262125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.7077066898345947
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.9545653661092123
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,1,128,1,float16,float16,0,0.9143466949462891
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,1.097493330637614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.9632960160573324
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,6144,2,1,128,1,float16,float16,0,1.7916852633158367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,1.999077320098877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,1.9042666753133137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.5305013259251913
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,1,128,1,float16,float16,0,0.47150933742523193
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.694159984588623
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.5079786777496338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,1.0437599817911785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.2852960030237834
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,1.91318941116333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,1,128,1,float16,float16,0,0.2943306763966878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.3006613254547119
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.18403732776641846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,1.0902079741160076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.6440320014953613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.45898667971293133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.38765867551167804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.6520693302154541
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,1,128,1,float16,float16,0,0.18238399426142374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.18397333224614462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.3635093371073405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.3883039951324463
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.9312907059987386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.9350240230560303
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,2,1,128,1,float16,float16,0,0.7955786387125651
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,1.9074880282084148
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,1.0038452943166096
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.43057600657145184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,1.7525653839111328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,1,128,1,float16,float16,0,0.4123733441034953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.9164213339487711
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,4096,2,1,128,1,float16,float16,0,1.7000746726989746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.46482666333516437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.25966399908065796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.6414773464202881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,1.937104066212972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.8911840120951334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,1,128,1,float16,float16,0,0.2437386711438497
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.5719626744588217
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.25411200523376465
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.5694133440653483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.23490132888158163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,1,128,1,float16,float16,0,0.155349334081014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.1625173290570577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.16865599155426025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.334330677986145
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.09613333145777385
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.23514666159947714
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.3633439938227336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.3413013219833374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.09728533029556274
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,2,1,128,1,float16,float16,0,0.08829333384831746
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.23545066515604654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.228277325630188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,1,128,1,float16,float16,0,0.4811946551005046
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,1.078874667485555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.5973333517710367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.5825279951095581
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.5720800161361694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,1.1820000012715657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,1,128,1,float16,float16,0,0.2637866735458374
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,3072,2,1,128,1,float16,float16,0,0.9964799880981445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.29130132993062335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.5851999918619791
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.377072016398112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.16817599534988403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,1.072719971338908
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,1,128,1,float16,float16,0,0.15609066685040793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.2834293246269226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.5756586790084839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.1681386629740397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.28201067447662354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.33160533507664997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.32846933603286743
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.1067733367284139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.285317341486613
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,1,128,1,float16,float16,0,0.10341333349545796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.10787733395894368
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.17652799685796103
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.2856053312619527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.0689279983441035
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.06851733227570851
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.17685866355895996
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.17493865887324014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,2,1,128,1,float16,float16,0,0.0662613312403361
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.17705066998799643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,1,128,1,float16,float16,0,0.5016266504923502
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.5862559874852499
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.6184533437093099
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.6281013488769531
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,1.0567573706309001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.29173866907755536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,2048,2,1,128,1,float16,float16,0,1.004149357477824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,1.1492106914520264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,1.069269339243571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.49820268154144287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.2855413357416789
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,2,1,128,1,float16,float16,0,0.24119999011357626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.30822400252024335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.18677866458892822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,1,128,1,float16,float16,0,0.1384213368097941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.5073973337809244
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.3173546592394511
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.15500266353289285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.09866133332252502
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.15475733081499735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,1,128,1,float16,float16,0,0.09261332949002583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.31781333684921265
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.16014400124549866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.05221866567929586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.12667733430862427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.09940266609191895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.15927466750144958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,1,128,1,float16,float16,0,0.04849066833655039
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.1267519990603129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.12071999907493591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.1267519990603129
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.11850666999816895
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.05242133140563965
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.12059733271598816
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04744533201058706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04744000236193339
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.1209279994169871
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,2,1,128,1,float16,float16,0,0.04553066690762838
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,1,128,1,float16,float16,0,0.28684266408284503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.3161333401997884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.175818661848704
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.32232532898585003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.6998720169067383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.38596800963083905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.40118932723999023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1536,2,1,128,1,float16,float16,0,0.6186986764272054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.6570080121358236
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.6534880002339681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,1,128,1,float16,float16,0,0.1586026648680369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.20366400480270386
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.17524800697962442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.10585600137710571
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.10652800401051839
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.20908266305923462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.3221973379453023
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.05961599946022034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,2,1,128,1,float16,float16,0,0.09338133533795674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.15150400002797446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.21188799540201822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.1497706671555837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,1,128,1,float16,float16,0,0.053770666321118675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.1520906686782837
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.09591466188430786
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.059621334075927734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.09604799747467041
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.04038399954636892
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.09283199906349182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.09618133306503296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,1,128,1,float16,float16,0,0.037861332297325134
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.09278399745623271
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.040261333187421165
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.03710933278004328
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.09036266803741455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.037050666908423104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,2,1,128,1,float16,float16,0,0.03600533306598663
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.09262933333714803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,1,128,1,float16,float16,0,0.3025226593017578
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.36137600739796955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.675706704457601
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.4248373508453369
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.43774934609731037
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.19723200798034668
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.6652479966481527
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.3004320065180461
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1024,2,1,128,1,float16,float16,0,0.6461653312047323
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.7516319751739502
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.19669334093729654
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,2,1,128,1,float16,float16,0,0.14612799882888794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.18343466520309448
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.2990880012512207
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.10371200243631999
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.11496000488599141
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.10308800141016643
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.1897546648979187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.05775466561317444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.11890133221944173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,1,128,1,float16,float16,0,0.04870399832725525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.07144533097743988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.18946133057276407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.05712000032265981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,2,1,128,1,float16,float16,0,0.0881119966506958
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.11667199929555257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.07144533097743988
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.033071999748547874
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.033039999504884086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,2,1,128,1,float16,float16,0,0.02943466603755951
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.06671999891599019
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.07132266461849213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.028698667883872986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.06682666639486949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.06449600060780843
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.028783999383449554
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,2,1,128,1,float16,float16,0,0.027210667729377747
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.04310933252175649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.06690666576226552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.04155199974775314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026399999856948853
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026538667579491932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,2,1,128,1,float16,float16,0,0.025775998830795288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.043322667479515076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.24938132365544638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.3678079843521118
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,1,128,1,float16,float16,0,0.21518399318059286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.36721599102020264
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.4630773464838664
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.46808000405629474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.14668800433476767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,1,128,1,float16,float16,0,0.09749866525332133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.19835732380549112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,512,2,1,128,1,float16,float16,0,0.5611573457717896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.5568426847457886
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.07134933272997539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.1274133324623108
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.1979573369026184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.12290133039156596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.12173333764076233
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,1,128,1,float16,float16,0,0.05509866774082184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.12552533547083536
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.04050666590531667
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,1,128,1,float16,float16,0,0.032885332902272545
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.040789333482583366
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.06825066606203715
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.02362666775782903
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.06834133466084798
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.044490665197372437
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.07161599894364674
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.06508799890677135
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.04455466568470001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.02369066576162974
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.04027199993530909
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.04438933233420054
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,2,1,128,1,float16,float16,0,0.020037333170572918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.01951466624935468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.03977599988381068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.019434666881958645
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.038047999143600464
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,2,1,128,1,float16,float16,0,0.017957333475351334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.03990400085846583
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.029866665601730347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,1,128,1,float16,float16,0,0.016522667060295742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.028405333558718365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.0173333336909612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.017407999684413273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.02979733298222224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.016048000504573185
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.02029866725206375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.016127999871969223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.019834666202465694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,2,1,128,1,float16,float16,0,0.016000000139077503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,1,128,1,float16,float16,0,0.07397866745789845
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.08814932902654012
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.14724266529083252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.1202239990234375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.12957333525021872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.05791999896367391
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,256,2,1,128,1,float16,float16,0,0.17822933197021484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.09097599983215332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.19091200828552246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,1,128,1,float16,float16,0,0.04186133543650309
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.044549331068992615
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.14777599771817526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.05794133245944977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.044394666949907936
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.03294399877389272
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.030847998956839245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,1,128,1,float16,float16,0,0.02478933334350586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.0446720023949941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.09186133742332458
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.03303466737270355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.019061333189407986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.031018666923046112
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,1,128,1,float16,float16,0,0.015103999525308609
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.026757332185904186
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.0189280000825723
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.031178665657838184
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.026730666557947796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,1,128,1,float16,float16,0,0.012879999975363413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.014602666099866232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.01462399959564209
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.024661332368850708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.026485333840052288
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.023178666830062866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.023178666830062866
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.012517333030700684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.02179199953873952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,1,128,1,float16,float16,0,0.0116799995303154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.012517333030700684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.01979200045267741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.019786667078733444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.011125333607196808
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.011429333438475927
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.019925333559513092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.011407999942700068
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,1,128,1,float16,float16,0,0.01110400011142095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.011141333729028702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.018981333822011948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.05208000044027964
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,1,128,1,float16,float16,0,0.035455999275048576
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.06646933158238728
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.03712533414363861
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.05203733344872793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.06795733173688252
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.02865600089232127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.037471999724706016
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,128,2,1,128,1,float16,float16,0,0.06319466729958852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.0240639994541804
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,1,128,1,float16,float16,0,0.02049066623051961
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.0647680014371872
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.02864533414443334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.016602666427691776
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.037445334096749626
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.024218666056791942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.020128000527620316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.016399999459584553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.02439466615517934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.01998399943113327
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,2,1,128,1,float16,float16,0,0.012709333250919977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.012298667182525
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.01982933282852173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.017957333475351334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.01240533341964086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.017871999492247898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,1,128,1,float16,float16,0,0.00933333362142245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.01676799977819125
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.010079999764760336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,2,1,128,1,float16,float16,0,0.010565333068370819
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.010037333394090334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.016575999557971954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,1,128,1,float16,float16,0,0.00884799969693025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.01777600000301997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.0161920003592968
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.008682666967312494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.01657066618402799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.016143999993801117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,1,128,1,float16,float16,0,0.008570666735370954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.01590399940808614
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.008522666369875273
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.01591466615597407
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.00860799973209699
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.01575999955336253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,1,128,1,float16,float16,0,0.008613333106040955
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.015935999651749928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.008512000242869059
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.02743999908367793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,1,128,1,float16,float16,0,0.019333332777023315
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03073599934577942
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.021583999196688335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.027349332968393963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,64,2,1,128,1,float16,float16,0,0.03332799921433131
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.03219199925661087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.0206986665725708
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.016000000139077503
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,1,128,1,float16,float16,0,0.012245333443085352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.030938667555650074
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.01201066623131434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.016010666886965435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.0161013330022494
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.01461333284775416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,1,128,1,float16,float16,0,0.010362666721145311
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.02067199970285098
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.011994666109482447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.01598400001724561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.01414399966597557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.0099093330403169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,1,128,1,float16,float16,0,0.009152000149091085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.01003200002014637
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.014080000420411428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.008805333326260248
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,1,128,1,float16,float16,0,0.008778666456540426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012400000045696894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,1,128,1,float16,float16,0,0.008485333373149237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.008447999755541483
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012602667013804117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.008474666625261307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.012159999459981918
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,1,128,1,float16,float16,0,0.008346666892369589
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.008410666758815447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.008250666782259941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.012261333564917246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,1,128,1,float16,float16,0,0.008127999802430471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.012026666353146235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.008229333286484083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.012549333274364471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.008272000278035799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.012256000190973282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.019226666539907455
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025941332181294758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,1,128,1,float16,float16,0,0.015578666081031164
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.01930133377512296
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.02109866589307785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.011776000261306763
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,32,2,1,128,1,float16,float16,0,0.02593066543340683
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.02022933339079221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.026704000929991405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.018511999398469925
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,1,128,1,float16,float16,0,0.0100426667680343
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025744001070658367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.011754666765530905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.018138666947682697
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.017935999979575474
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.009658666948477427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.009765333185593287
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.020303999384244282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,1,128,1,float16,float16,0,0.00854399986565113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.01648533344268799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.008485333373149237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.008586666857202848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,1,128,1,float16,float16,0,0.008367999767263731
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.008336000144481659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.008165333420038223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,1,128,1,float16,float16,0,0.00816000004609426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.008010666817426682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.008037333066264788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.016490666816631954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,1,128,1,float16,float16,0,0.00797333319981893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.007871999715765318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.007978666573762894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,1,128,1,float16,float16,0,0.008000000069538752
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.016496000190575916
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.016309333344300587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.008063999935984612
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.01540800059835116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.021429332594076794
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,1,128,1,float16,float16,0,0.013770667215188345
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.018640000373125076
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.01540800059835116
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,16,2,1,128,1,float16,float16,0,0.022613334159056347
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.02202133337656657
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.009818666925032934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.0182239996890227
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.021381333470344543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.008565333361426989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.009722666814923286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.01657066618402799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,1,128,1,float16,float16,0,0.008538666491707167
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.018309333672126133
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.008165333420038223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.00854399986565113
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.016234666109085083
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,1,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.010522666076819101
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.016127999871969223
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,1,128,1,float16,float16,0,0.007989333321650824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.008293333152929941
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.008031999692320824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.007823999971151352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.016735999534527462
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.007914666707317034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.007760000104705493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,2,1,128,1,float16,float16,0,0.00790933333337307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.016805333395799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.01672533278663953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.007850666840871176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,2,1,128,1,float16,float16,0,0.007823999971151352
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.016607999801635742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.016384000579516094
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,1,128,1,float16,float16,0,0.007850666840871176
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.007882666463653246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.00789866658548514
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.01759999990463257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.016309333344300587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,1,128,1,float16,float16,0,0.009125333279371262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.013440000514189402
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.011551999797423681
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,2,128,1,float16,float16,0,0.011711999773979187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,2,2,128,1,float16,float16,0,0.011589333415031433
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,256,1,2,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.013738666971524557
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.011514666179815928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,2,128,1,float16,float16,0,0.00797333319981893
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.013701333353916803
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,1,128,1,float16,float16,0,0.008821333448092142
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,2,128,1,float16,float16,0,0.007754666730761528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.011450666934251785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,2,2,128,1,float16,float16,0,0.007903999959429106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,2,2,128,1,float16,float16,0,0.008240000034372011
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,2,128,1,float16,float16,0,0.007802666475375493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,2,128,1,float16,float16,0,0.007711999739209811
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,2,2,128,1,float16,float16,0,0.007573333258430163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,1,128,1,float16,float16,0,0.008661333471536636
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,2,128,1,float16,float16,0,0.007658666620651881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,2,2,128,1,float16,float16,0,0.00772266648709774
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.010805333654085795
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,2,128,1,float16,float16,0,0.007525333513816197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,2,1,128,1,float16,float16,0,0.008602666358153025
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,2,128,1,float16,float16,0,0.0075573331365982694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,1,128,1,float16,float16,0,0.008597333605090777
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.010640000303586325
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,2,2,128,1,float16,float16,0,0.0074346667776505155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,1,128,1,float16,float16,0,0.008336000144481659
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,2,128,1,float16,float16,0,0.00749333327015241
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,2,2,128,1,float16,float16,0,0.0075573331365982694
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.8118133544921875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.8209813435872396
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,1.4900320370992024
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,1.5119679768880208
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,1.7952052752176921
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.4941973288853963
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.5082666476567587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.9111733436584473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,3.1889867782592773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,3.2452265421549478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,1.7835839589436848
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.9165439605712891
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.8733812967936198
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.5347146590550741
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.518447995185852
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.8685493469238281
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,1.1318506399790447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,1.9542773564656575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.3311466574668884
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.3278613289197286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,1.1447306474049885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,1.9995147387186687
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.6784266630808512
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.6774720350901285
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.6324906746546427
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.6328959862391154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.40488000710805255
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.7958239714304606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.3930879831314087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,1.495530605316162
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.24708799521128336
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,1.4508800506591797
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.24460800488789877
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.5927146673202515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.7853653430938721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.5651626586914062
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.8425493240356445
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.8224319616953532
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.4465706745783488
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.9984373251597086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.4514666795730591
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,1.6794239679972331
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.2800053358078003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,1.7607359886169434
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.6913866996765137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,1.0169706344604492
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.28225600719451904
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.17735999822616577
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.17537599802017212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.4480160077412923
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.6782826582590739
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.45227734247843426
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.46964800357818604
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.5069226821263632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.283680001894633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,1.0079999764760335
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.6400959889094034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,1.037845293680827
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.18101332585016885
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.28670400381088257
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.6410026550292969
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.36802132924397785
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.18100800116856894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.1276533305644989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.3675359884897868
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.12527466813723245
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.3385386864344279
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.3374240001042684
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.2593119939168294
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.2568906744321187
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.9050026734670004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.5552213191986084
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.43237332503000897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.5617173512776693
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.1597599983215332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.3315093318621318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.45374401410420734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.9005333582560221
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.09178666273752849
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.3320159912109375
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.23458667596181235
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.23331733544667563
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.09174933036168416
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.0846666693687439
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.15923733512560526
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.084714670976003
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.22726933161417642
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.22807466983795166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.16224533319473267
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.2730026642481486
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.10547199845314026
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.1644053359826406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.33162132898966473
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.2688159942626953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.3325013319651286
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.5593119859695435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.565994660059611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.10616532961527507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.2837173342704773
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.2849973241488139
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.06820799907048543
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.06533866624037425
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.17645333210627237
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.17208532492319742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.1766186753908793
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.06777599950631459
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.1731520096460978
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.06588266789913177
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.26369067033131915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.1490720013777415
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.3072800040245056
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.14829333623250326
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.502784013748169
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.5059946775436401
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.09732266267140706
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.2730666597684224
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.15772799650828043
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.0974666674931844
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.3099306623140971
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.15793599685033163
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.12610133488972983
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.04685866832733154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.12626666824022928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.04725333551565806
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.120688001314799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.051216001311937966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.05091733237107595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.045168002446492515
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.11999467015266418
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.11871467034022014
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.045461331804593406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.11863999565442403
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.10101866722106934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.19614932934443155
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.1691733400026957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.10174399614334106
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.16809600591659546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.32202666997909546
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.31768532594045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.20318400859832764
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.058543999989827476
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.058890665570894875
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.153029332558314
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.09593600034713745
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.0376800000667572
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.03974399964014689
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.03985599925120672
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.09545066952705383
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.15106667081514993
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.09316800038019817
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.03728000074625015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.035573333501815796
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.09318400422732036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.09021332859992981
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.09075199564297994
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.035642666121323906
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.09743466973304749
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.1834133267402649
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.09834133585294087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.2982719937960307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.1824959913889567
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.17669866482416788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.30026666323343915
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.05523733297983805
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.11609066526095073
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.18577067057291666
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.03199466566244761
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.0705973356962204
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.05571199953556061
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.07051733136177063
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.11661866307258606
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.06673066814740498
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.028575999041398365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.03209066639343897
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.0668693333864212
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.028688001135985058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.026704000929991405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.06477333108584087
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.025573333104451496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.06462400158246358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.0400693342089653
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.04019733270009359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.026719999810059864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.025583999852339428
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06677333513895671
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.12479999661445618
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.12411733468373616
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06613333523273468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.1311306655406952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.19729600350062051
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.1267359952131907
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.0383146678407987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.19525333245595297
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.06609599788983662
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.06675733129183452
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.0223786657055219
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.03806400050719579
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.022634667654832203
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.04002666721741358
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.019120000302791595
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.043525333205858864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.04350399971008301
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.017514667163292568
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.03999999910593033
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.01748266691962878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.03789866715669632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.03803733239571253
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.0269813338915507
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.027098665634791057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.016303999970356624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.016352000335852306
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.016010666886965435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.018944000204404194
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.018816000471512478
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.016074666132529575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.04232533276081085
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.030282666285832722
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.030394665896892548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.05248000224431356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.0525493323802948
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.0886293351650238
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.08770666519800822
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.017808000246683758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.0303413321574529
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.0424586683511734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.017808000246683758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.01444799949725469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.026714667677879333
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.026704000929991405
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.03010133405526479
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.01444799949725469
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.024570666253566742
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.01267733300725619
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.012762666990359625
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.02455466737349828
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.019930666933457058
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.0116799995303154
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.020207999895016353
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.011648000528415045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.011306667079528173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.019381333142518997
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.011157333850860596
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.018394666413466137
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.01848000039656957
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.015392000476519266
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.015498666713635126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.02608533451954524
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.02606933315594991
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03532800078392029
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03523733218510946
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.019920000185569126
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.011962667107582092
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023413332800070446
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.011893333246310553
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.017893332988023758
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.01993600030740102
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.010309333602587381
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.017957333475351334
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.010298666854699453
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.009242666885256767
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.008767999708652496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.016154666741689045
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.01607999950647354
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.008400000010927519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.008442666381597519
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.01569066693385442
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.015861333658297855
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.01598400001724561
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.011770666887362799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.01179733375708262
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.01479999969402949
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.016229332735141117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.019946667055288952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.0198186660806338
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.014831999937693277
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.014202666779359182
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.010128000130256018
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.010026666646202406
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.01422400027513504
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.009082666908701261
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012602667013804117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.00879466657837232
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012554666648308435
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.012240000069141388
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.008416000132759413
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.012400000045696894
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.008506666868925095
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.012069333344697952
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.008362666393319765
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.008309333274761835
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.011839999506870905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.012128000458081564
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.008223999912540117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.009872000043590864
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.009866666669646898
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.01821333294113477
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.011498666057984034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.020128000527620316
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.01157333329319954
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.020303999384244282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.008565333361426989
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.018218666315078735
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.008474666625261307
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.01664000004529953
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.00842666688064734
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.008021333565314611
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.008373333141207695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.008143999924262365
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.016805333395799
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.007861333588759104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.008031999692320824
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.016261332978804905
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.007834666719039282
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.008112000301480293
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.016016000260909397
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.008816000074148178
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.009898666913310686
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.018320000420014065
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.009818666925032934
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.018543999642133713
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.016645333419243496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.008410666758815447
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.016303999970356624
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.008330666770537695
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.00814933329820633
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.008127999802430471
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.008218666538596153
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.00784533346692721
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.016890666137139004
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.007930666829148928
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.016730666160583496
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.007861333588759104
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.007743999982873599
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.007733333234985669
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.01646399994691213
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.007754666730761528
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,1,1,128,1,float16,float16,0,0.007882666463653246
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,64,1,1,1,128,1,float16,float16,0,0.007871999715765318
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.011482667177915573
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,1,1,128,1,float16,float16,0,0.008026666939258575
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,128,1,1,1,128,1,float16,float16,0,0.008053333188096682
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.011525332927703857
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,1,1,128,1,float16,float16,0,0.007701333612203598
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,32,1,1,1,128,1,float16,float16,0,0.007781333600481351
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,1,1,128,1,float16,float16,0,0.007658666620651881
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,16,1,1,1,128,1,float16,float16,0,0.007760000104705493
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,1,1,128,1,float16,float16,0,0.00761600024998188
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,1,1,128,1,float16,float16,0,0.007536000261704127
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,8,1,1,1,128,1,float16,float16,0,0.007631999750932057
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,4,1,1,1,128,1,float16,float16,0,0.007642666498819987
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,1,1,128,1,float16,float16,0,0.007466666400432587
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.010533332824707031
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,2,1,1,1,128,1,float16,float16,0,0.007471999774376552
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,1,1,128,1,float16,float16,0,0.007525333513816197
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flash_attn,1,1,1,1,128,1,float16,float16,0,0.007370666911204656
VLLM,0.12.0,NVIDIA L40S,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.010992000500361124
