framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,float16,0,30.38909912109375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,float16,0,16.92570622762044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,22.343289693196613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,13.093755086263021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,float16,0,15.433483123779297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,11.18002192179362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,float16,0,15.695232391357422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,11.425242106119791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,float16,0,15.858863830566406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,11.840890248616537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,11.813237508138021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,float16,0,15.727621714274088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,22.789044698079426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,float16,0,7.933514912923177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,6.670960108439128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,float16,0,30.23053741455078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,float16,0,7.161322911580403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,float16,0,31.235689798990887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,5.675504048665364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,float16,0,7.35867182413737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,5.866650899251302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,float16,0,7.423610687255859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,5.9750722249348955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,float16,0,7.195066452026367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,float16,0,3.8520800272623696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,6.44598388671875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,float16,0,3.494079907735189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,3.6671625773111978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,3.2207412719726562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,2.835119883219401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,float16,0,3.6445013682047525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,2.9567254384358725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,float16,0,3.617552121480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,float16,0,3.5914185841878257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.9217812220255532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,float16,0,30.921610514322918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,23.10466766357422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,13.120816548665365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,23.46387227376302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,13.137563069661459
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,float16,0,17.857018788655598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,float16,0,17.983306884765625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,13.30080540974935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,float16,0,17.95384470621745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,float16,0,18.80730692545573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,float16,0,8.681813557942709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,6.91432507832845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,7.885642369588216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,13.600660959879557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,7.308064142862956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,float16,0,8.012240091959635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,float16,0,9.665829340616861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,float16,0,8.489706675211588
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,float16,0,4.320783933003743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,7.202816009521484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,6.724192301432292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,3.6242294311523438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,4.009498596191406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,float16,0,8.806426366170248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,float16,0,4.582762718200684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,4.231637318929036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,float16,0,4.764917373657227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,3.3803841272989907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,float16,0,4.068613370259603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,float16,0,4.1723893483479815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.3593759536743164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,float16,0,1.9837120374043782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,2.039306640625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,2.125802675882975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,float16,0,2.0315732955932617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,2.026106675465902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,float16,0,2.532693386077881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,1.7461120287577312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,float16,0,2.1835999488830566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,float16,0,2.0859200159708657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.9825973510742188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,9.124293645222982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,9.05996831258138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,float16,0,12.270058949788412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,9.612661361694336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,float16,0,12.080820719401041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,9.52726936340332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,float16,0,12.565706888834635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,float16,0,13.001018524169922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,4.513776143391927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,float16,0,5.576096216837565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,4.558042526245117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,5.820453643798828
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,float16,0,6.057696024576823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,float16,0,6.807173411051433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,float16,0,5.969674428304036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,5.833066940307617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,float16,0,3.319429397583008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,float16,0,6.089797337849935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,2.6748374303181968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,2.9587891896565757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,5.0373226801554365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,float16,0,2.836847941080729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,2.3092479705810547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,float16,0,3.2745278676350913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,float16,0,1.4103786150614421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,float16,0,3.3171412150065103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,3.2877279917399087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.4133386611938477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,float16,0,2.91265074412028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,1.2102826436360676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,1.5510346094767253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,float16,0,1.6316320101420085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,float16,0,1.4950613975524902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,1.376032034556071
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,1.6117973327636719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,float16,0,1.4692586263020833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.260602633158366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,float16,0,1.6888000170389812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,12.101156870524088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,12.379861195882162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,12.442532857259115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,float16,0,16.402549743652344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,float16,0,16.260677337646484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,float16,0,16.519973754882812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,float16,0,7.296288172403972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,6.068826675415039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,float16,0,16.497957865397137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,7.941984176635742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,float16,0,8.107152303059896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,6.176015853881836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,float16,0,9.316629409790039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,12.728453318277994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,6.258544286092122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,2.9932638804117837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,float16,0,7.864277521769206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,float16,0,3.8188854853312173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,float16,0,8.002581278483072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,4.022992134094238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,float16,0,4.442149480183919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,6.327338536580403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,3.0062081019083657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,float16,0,3.761882781982422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,float16,0,4.210591952006022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,float16,0,1.8498934110005696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,1.8729440371195476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,float16,0,4.056954701741536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.2095041275024414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,2.0492746035257974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,1.5342933336893718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,float16,0,1.9252479871114094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,float16,0,2.237925370534261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,float16,0,1.9321813583374023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,float16,0,1.89466126759847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,2.064197381337484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.6165812810262044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.8189386526743571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,float16,0,0.9692693551381429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.9214933713277181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.85426131884257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,float16,0,0.9882613023122152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,3.5908374786376953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,1.096239964167277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,float16,0,1.1448746522267659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.8980106512705485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,float16,0,1.001461346944173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,float16,0,0.9419733683268229
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,6.884714762369792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,6.947242736816406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,float16,0,9.118389129638672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,7.430789311726888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,float16,0,9.56814956665039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,float16,0,9.798149108886719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,float16,0,9.713658650716146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,3.421130816141764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,float16,0,4.800234794616699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,7.377274831136067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,4.020607948303223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,float16,0,4.284159978230794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,5.028890609741211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,float16,0,5.404133478800456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.766021410624186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,3.7401866912841797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,float16,0,4.626645406087239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,float16,0,2.1538559595743814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,float16,0,2.1281280517578125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,float16,0,2.711615880330404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.7730719248453777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.9772000312805176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,float16,0,2.426901340484619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,float16,0,2.2528586387634277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,float16,0,4.510805447896321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,float16,0,1.0783147017161052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,2.048895994822184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,1.0606186389923096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,float16,0,1.0935839811960857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,1.0179626941680908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.8214507102966309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,1.029029369354248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,float16,0,1.154410680135091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,float16,0,1.3543200492858887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,1.3135733604431152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,2.6137653986612954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,1.0116639931996663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,float16,0,1.1463786760965984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,float16,0,0.5607839822769165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,float16,0,0.5720266501108805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.4808906714121501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,float16,0,0.5904906590779623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,0.6861173311869303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,float16,0,0.7028053601582845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.48815464973449707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.5290240049362183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,float16,0,0.5968960126241049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,6.499909083048503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.529199997584025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,6.675285339355469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,float16,0,8.699904123942057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,7.432165145874023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,float16,0,8.607685089111328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,float16,0,9.36734390258789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.2400852839152017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,float16,0,4.471994717915853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,float16,0,4.024095853169759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,4.0430558522542315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,7.253744125366211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,float16,0,5.548378626505534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,float16,0,8.978282928466797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,5.373002370198567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,float16,0,4.446325302124023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,4.46232541402181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,float16,0,4.445589383443196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,3.7707627614339194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,float16,0,1.9942347208658855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,float16,0,2.0821332931518555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.7064587275187175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,float16,0,2.234069347381592
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,float16,0,2.777344067891439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,2.7336533864339194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,float16,0,2.1853013038635254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,2.2036852836608887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.8906026681264242
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,float16,0,1.01526935895284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,float16,0,1.044816017150879
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.6385386784871419
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.8420960108439127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,float16,0,1.386672019958496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,0.9835733572642008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,float16,0,1.1049226919809978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,float16,0,0.5190879901250204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,0.9608426888783773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,1.8500587145487468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.453818678855896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,float16,0,0.5259360074996948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,0.7143999735514323
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,float16,0,0.5570773283640543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,float16,0,0.710863987604777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,float16,0,0.570026675860087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.5155466794967651
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,float16,0,0.2776533365249634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.24909865856170654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,0.38226668039957684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.2523039976755778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,float16,0,0.3916906515757243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,float16,0,0.3001599907875061
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,1.3877280553181965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,float16,0,1.1129973729451497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.27720000346501666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,float16,0,0.3005066712697347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.46067198117574054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.5109013319015503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,3.8675254185994468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,3.9932692845662436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,float16,0,4.7256425221761065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,float16,0,0.2816426753997803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,float16,0,4.857093175252278
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.2771466573079427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,float16,0,5.309136072794597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,4.482181231180827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,float16,0,2.5205440521240234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,1.9524213473002117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,4.472991943359375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,2.0206185976664224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,float16,0,2.481930732727051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,float16,0,5.329903920491536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,3.5643412272135415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,float16,0,2.6567893028259277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,float16,0,3.5222400029500327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,2.3501760164896646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,float16,0,2.590160051981608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,2.3123146692911782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,float16,0,1.1970400015513103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,1.0249919891357422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,1.029685338338216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,float16,0,1.2112267017364502
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,1.1677760283152263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,float16,0,1.3199573357899983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,float16,0,1.7790452639261882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,1.180560032526652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,float16,0,1.3229119777679443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.529210646947225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,float16,0,0.6088533401489258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.5404160022735596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,float16,0,0.6446826855341593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.6010560194651285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,float16,0,0.8920106887817383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,0.917136033376058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.6056906779607137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,float16,0,0.6745493412017822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.2816106677055359
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,float16,0,0.314517339070638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,float16,0,0.32371199131011963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.2908533414204915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,float16,0,0.35015467802683514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,float16,0,0.4610613187154134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.32657066980997723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,float16,0,0.3460799853006999
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,1.823962688446045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,float16,0,0.17402132352193198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.15838399529457092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,float16,0,0.6785066922505697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,float16,0,0.17715734243392944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,float16,0,0.27261332670847577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.16150933504104614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,float16,0,0.19026132424672446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,0.26553599039713544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.1737013260523478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.17715734243392944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,float16,0,0.19338667392730713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.328874667485555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,0.47913066546122235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,3.829573313395182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,float16,0,4.792746543884277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,4.006885210673015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,float16,0,4.824277242024739
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,float16,0,5.573317209879558
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,4.544986724853516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,float16,0,2.3374932607014975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,float16,0,5.4695892333984375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,float16,0,2.3918347358703613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,2.122709274291992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,4.643871943155925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,float16,0,2.7414986292521157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,float16,0,3.8624267578125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,4.098106702168782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,2.421269257863363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,float16,0,2.7629067103068032
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,float16,0,1.1579413414001465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,0.9834720293680826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,float16,0,1.1914880275726318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,1.040005366007487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,float16,0,1.9224586486816406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,1.2252853711446126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,float16,0,1.3468906084696453
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,2.0711466471354165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,float16,0,1.319439967473348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,float16,0,0.585973342259725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,2.021519978841146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,2.618213335673014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.5075146754582723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,float16,0,0.601301352183024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.5275040070215861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,1.2582133611043294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.6405440171559652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,1.0408746401468914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,float16,0,0.30557332436243695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,0.6123199860254923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,float16,0,0.6828587055206299
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,float16,0,0.690778652826945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,float16,0,0.9680426915486654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.27287999788920086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,float16,0,0.3531839847564697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,float16,0,0.314736008644104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.33579734961191815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.2823733290036519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,float16,0,0.4980800151824951
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,float16,0,0.1662826637427012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.15129599968592325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.15760533014933267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,float16,0,0.1780959963798523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,float16,0,0.1900906761487325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,0.28497066100438434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,float16,0,0.19006399313608804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.18371200561523438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,float16,0,0.09586133559544881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.08876799543698628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,float16,0,0.09854400157928467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,float16,0,0.16090133786201477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,0.16591466466585794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.0904266635576884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.099973330895106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,float16,0,0.10481066505114238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,0.5343786478042603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.34011733531951904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,float16,0,0.35554667313893634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.09938133756319682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,float16,0,0.10623466968536377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.17861332496007284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,float16,0,0.2912000020345052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,float16,0,2.8434826532999673
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,2.5309227307637534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,2.389946619669596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,2.9699201583862305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,float16,0,2.960229237874349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,float16,0,1.4349546432495117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,1.2091946601867676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,float16,0,3.3869333267211914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,float16,0,1.541983922322591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,1.278549353281657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,float16,0,1.7532480557759602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,1.5280052820841472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,2.8066507975260415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,float16,0,2.5981225967407227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,float16,0,3.449082692464193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,float16,0,1.7111573219299316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,float16,0,0.7411519686381022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,0.6199680169423422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,3.046293258666992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,float16,0,0.741978645324707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,1.4166399637858074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,float16,0,0.856714646021525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,0.8001493612925211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,float16,0,0.8393226464589437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,0.8218026955922445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,float16,0,0.3632800181706746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,float16,0,0.7004640102386475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.3261173367500305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,1.5745333035786946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,0.7203359603881836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.349509318669637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.40863998730977374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,float16,0,0.44192532698313397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,float16,0,1.3092959721883137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.40969598293304443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.17745065689086914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,float16,0,0.20109866062800089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,float16,0,0.20268267393112183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,float16,0,0.2330133318901062
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,float16,0,0.3413333495457967
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.18691732486089072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.22140800952911377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,float16,0,0.3766560157140096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,float16,0,0.23281067609786987
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.2274186611175537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.10074133674303691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,float16,0,0.4379146496454875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,float16,0,0.11316800117492676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,float16,0,0.20547733704249063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.10525866349538167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,0.655290683110555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,float16,0,0.12899733583132425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,0.20269334316253662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.11817600329717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.11819733182589214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,float16,0,0.13148799538612366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,float16,0,0.0668639987707138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,float16,0,0.0682666649421056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.06365333497524261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,float16,0,0.0722453345855077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,0.37371734778086346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.06936533252398173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.11409599582354228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.06990399956703186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,float16,0,0.11757866541544597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,float16,0,0.11365333199501038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,float16,0,3.0071519215901694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,2.525482654571533
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.06294933458169301
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,2.7059574127197266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,float16,0,3.0907465616861978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,float16,0,3.792330741882324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,float16,0,0.07351466516653697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,3.3824853897094727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,float16,0,1.486464023590088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,float16,0,3.715221405029297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,float16,0,1.5316799481709797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,3.4809226989746094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,1.294320027033488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,float16,0,1.88265593846639
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,float16,0,3.0531253814697266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,1.362768014272054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,1.793733278910319
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,float16,0,1.895055929819743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,3.387749354044596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,1.6983307202657063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,float16,0,0.78548796971639
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,0.6483999888102213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,float16,0,1.52510404586792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,0.6986186504364014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,float16,0,0.9356586933135986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,0.8462666670481364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,float16,0,0.9643946488698324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,float16,0,0.7698826789855957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,0.918842633565267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,float16,0,0.38074668248494464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.3392266829808553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,1.7112107276916504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,float16,0,0.3999093373616536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,float16,0,0.7677546342213949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.44838400681813556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.3587733507156372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,float16,0,0.4785439968109131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,0.44386665026346844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,float16,0,0.47654398282368976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,0.8659093379974365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.18451199928919473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.19379733006159464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,float16,0,0.2114880084991455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,float16,0,0.24982933203379312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.24290132522583008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,float16,0,0.24761066834131876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.249616007010142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,0.44549866517384845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,float16,0,0.11357333262761433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,float16,0,0.3938026825586955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.10034666458765666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.10626133282979329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.12523200114568075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,float16,0,0.13689600427945456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.13387733697891235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,float16,0,0.20770132541656494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,0.23413334290186563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,float16,0,0.06597333153088887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,float16,0,0.2037066618601481
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.059903999169667564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.061749334136645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.07008000214894612
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,float16,0,0.12929067015647888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.13451199730237326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,float16,0,0.0744053324063619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,float16,0,0.11874666810035706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,float16,0,0.04199466605981191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,float16,0,0.043578664461771645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.038736000657081604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.06624533236026764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,float16,0,0.13690666357676187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.03965333352486292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,float16,0,0.07111999889214833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,float16,0,0.045893331368764244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.043925335009892784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,float16,0,0.046122665206591286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,float16,0,0.06698666512966156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.07098133365313213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,float16,0,2.2002080281575522
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,1.8813440004984539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,float16,0,2.2924319903055825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,2.0615359942118325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.0440586656332016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,float16,0,2.954282760620117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,float16,0,0.07262933254241943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,float16,0,1.0856160322825115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,2.8926881154378257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,0.9510719776153564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,float16,0,2.8848854700724282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,1.0437333583831787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,2.677349408467611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,float16,0,1.1709813276926677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,float16,0,2.624901294708252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,float16,0,1.4649653434753418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,3.0466718673706055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,1.3154613176981609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,float16,0,0.5515093406041464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,float16,0,0.6011840105056763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,0.4837599992752075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,1.4647040367126465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,float16,0,1.4835093816121419
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,0.6819039980570475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,float16,0,0.7520373662312826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,0.5325440168380737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,0.6982186635335287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,1.538058598836263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,float16,0,0.7239627043406168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,float16,0,0.29334400097529095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.25358400742212933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.28222399950027466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.3633439938227336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,float16,0,0.3111306627591451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,float16,0,0.39618666966756183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,0.37353599071502686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,0.776634693145752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,float16,0,1.3242719968159993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,float16,0,0.6716000239054362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,float16,0,0.17116800944010416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.14050133029619852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.19426665703455606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,float16,0,0.20716800292332968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,0.3988800048828125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.15550399820009866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,float16,0,0.20436267058054605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.20377600193023682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,float16,0,0.08916266759236653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.07739733159542084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,float16,0,0.0962720016638438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.082997332016627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,float16,0,0.11292800307273865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.09947733084360759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,float16,0,0.18231467405954996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,float16,0,0.39977598190307617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.1049173374970754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,float16,0,0.11107200384140015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,float16,0,0.1590079963207245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.045647998650868736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,float16,0,0.05307200054327647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,float16,0,0.05153599878152212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.11329600214958191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.05506666501363119
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,float16,0,0.05795733133951823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,float16,0,0.10072533289591472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,float16,0,0.3441760142644246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.056330665946006775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,float16,0,0.060005332032839455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,float16,0,0.03198933353026708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.029546665648619335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.057392001152038574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.03161599983771642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,float16,0,0.062496001521746315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,0.20986666282018027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,float16,0,0.03588266670703888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.03570666660865148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,float16,0,0.021338666478792827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.04704533517360687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.03463999927043915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,float16,0,0.027376001079877216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.019530666371186573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,float16,0,0.021557333568731945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.021344001094500225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,float16,0,0.03386666625738144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,float16,0,0.021173333128293354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.03492266684770584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,float16,0,0.03588266670703888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.021365332106749218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.01931200052301089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,float16,0,0.02070933332045873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,float16,0,0.8335466384887695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,0.7559093634287516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,0.8504479726155599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,float16,0,0.898751974105835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,float16,0,1.2224746545155842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,1.1426666577657063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,float16,0,0.4249386787414551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,0.38654398918151855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,float16,0,0.4684640169143677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,float16,0,1.270133336385091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,0.4331520001093547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,1.167866627375285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,1.4315199851989746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,float16,0,0.6242986520131429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,0.5814666748046875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,float16,0,0.233130673567454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,float16,0,0.6156373421351115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.2039733330408732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,float16,0,0.24207999308904013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.22556267182032266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,0.5915946563084921
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,float16,0,0.6069066524505615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.3088853359222412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,float16,0,0.33081599076588947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,float16,0,0.3099679946899414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,0.3212266763051351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,float16,0,0.12186132868131001
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,0.7259626388549805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,float16,0,1.1993813514709473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.11266666650772095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,0.3720266819000244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,float16,0,0.16831467549006143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,float16,0,0.1313866674900055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.16690133015314737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,float16,0,0.0683786670366923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.12148800492286682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,float16,0,0.17509865760803223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.175653338432312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,float16,0,0.193615992863973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.05996799965699514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.06622933348019917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,float16,0,0.31360000371932983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,float16,0,0.09103467067082723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.08801066875457764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,float16,0,0.09122666716575623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.08925867080688477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,float16,0,0.04021333406368891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.10545600454012553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,float16,0,0.042026668787002563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.03957866628964742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,float16,0,0.04794666667779287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,float16,0,0.10472533106803894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.048058668772379555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.04816000163555145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,float16,0,0.025216000775496166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,float16,0,0.025589334468046825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.025461333493391674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,float16,0,0.0745119998852412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,float16,0,0.04943466683228811
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.029482667644818623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,float16,0,0.029253333806991577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.19350934028625488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,float16,0,0.029557332396507263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.03745600084463755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,float16,0,0.017456000049908955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,float16,0,0.017210666090250015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,float16,0,0.0498933345079422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.03152533372243246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,float16,0,0.023562667270501454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,float16,0,0.01922133316596349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,float16,0,0.019120000302791595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,float16,0,0.015450666348139444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.05147733290990194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.01545599972208341
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,float16,0,0.01904533306757609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,float16,0,0.017162666966517765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,float16,0,0.5197759866714478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,0.4768426815668742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,float16,0,0.5604000091552734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,float16,0,0.015392000476519266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,float16,0,0.01573866605758667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,0.5264159838358561
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,float16,0,0.7079253196716309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,0.7326080004374186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,float16,0,0.2693386673927307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,float16,0,0.7029120127360026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.24685333172480264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,float16,0,0.2903413375218709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.26863465706507367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,float16,0,0.6883520285288492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,0.7669333616892496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,float16,0,0.377072016398112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.37747732798258465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,0.34959999720255536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,float16,0,0.380570650100708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.13294933239618936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,float16,0,0.1463520030180613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,0.3911573489507039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.1421440045038859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.18610133727391562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,float16,0,0.3319573402404785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,0.6902399857838949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,float16,0,0.0823466678460439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.18529067436854044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,float16,0,0.19872534275054932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.07230933507283528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,float16,0,0.08747200171152751
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,float16,0,0.1729066570599874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.07923733194669087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,float16,0,0.10408533612887065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.20290666818618774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,float16,0,0.10354666908582051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.09925867120424907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,float16,0,0.04580800235271454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,float16,0,0.15571199854214987
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.041797334949175514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,float16,0,0.1930933396021525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,float16,0,0.09301867087682088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,float16,0,0.0461760014295578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.10804800192515056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,float16,0,0.05603733162085215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,float16,0,0.02808533360560735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.05119466781616211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.025461333493391674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,float16,0,0.02938133229811986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,float16,0,0.031557333966096245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.03151999910672506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,float16,0,0.03245333333810171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.09835199515024821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.03175999969244003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.05286933481693268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,float16,0,0.01739199956258138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,float16,0,0.01905599981546402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.039690665900707245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,float16,0,0.01929066702723503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,float16,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.03257066756486893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,float16,0,0.02568000058333079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.019834666202465694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,float16,0,0.01332266628742218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,float16,0,0.052144000927607216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.01332266628742218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.019461333751678467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,float16,0,0.01488000030318896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,float16,0,0.017162666966517765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,float16,0,0.05876799921194712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,float16,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,float16,0,0.013530666629473368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,float16,0,0.013327999661366144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.3928320010503133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,float16,0,0.5498239994049072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,float16,0,0.4724160035451253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.5009066661198934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.4205333391825358
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,float16,0,0.5438933372497559
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,float16,0,0.23875733216603598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,0.4654666582743327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.2064746618270874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,float16,0,0.2512693405151367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,float16,0,0.4639093478520711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,float16,0,0.28378132979075116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,0.49766401449839276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,float16,0,0.281551996866862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.2171893318494161
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.263973335425059
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.24107199907302856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.11028800408045451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.26313600937525433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,float16,0,0.13580800096193948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.116757333278656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,float16,0,0.12864533066749573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,float16,0,0.21924267212549844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.13911466797192892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.138565331697464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,float16,0,0.15040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.060218666990598045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,float16,0,0.15134933590888977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,float16,0,0.07268266876538594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,float16,0,0.4500906864802043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,float16,0,0.12602667013804117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,float16,0,0.0778186668952306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.1262079974015554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,float16,0,0.08121599753697713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.07021866738796234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,float16,0,0.0705866664648056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,float16,0,0.04167466859022776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.034815999368826546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.035936000446478523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.06250666578610738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.0710453341404597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,float16,0,0.039962666730086006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,float16,0,0.04288533329963684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,float16,0,0.06483733157316844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,float16,0,0.043807998299598694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,float16,0,0.025392000873883564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,float16,0,0.025386666258176167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,float16,0,0.02609066665172577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,float16,0,0.031856000423431396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,float16,0,0.026799999177455902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.025477332373460133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.016927999754746754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.06106133262316386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,float16,0,0.017231999586025875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,float16,0,0.01735466718673706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,float16,0,0.017370666066805523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,float16,0,0.02117866774400075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.03993066648642222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.0406986673672994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.023386667172114056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,float16,0,0.012847999731699625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,float16,0,0.015301333119471868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,float16,0,0.01313599944114685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,float16,0,0.43221867084503174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,float16,0,0.4333759943644206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.37096532185872394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.3587626616160075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,float16,0,0.01515199989080429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,float16,0,0.4818613529205322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.18617600202560425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,float16,0,0.4692106644312541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.4129120111465454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,float16,0,0.22950400908788046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,float16,0,0.23534399271011353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.19197332859039307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,float16,0,0.3139413396517436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.3158666690190633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,float16,0,0.24657599131266275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.21434134244918823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.21281067530314127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,float16,0,0.24942932526270548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,float16,0,0.11831999818483989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.09779199957847595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.41250133514404297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,float16,0,0.12276267011960347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,float16,0,0.129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.10753066341082256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.10037333766619365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.10846400260925293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,float16,0,0.16586666305859885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.16473600268363953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,float16,0,0.13288533687591553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,float16,0,0.06663466493288676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.05421333511670431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.06042666733264923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.09020800391832988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,float16,0,0.0708000014225642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,float16,0,0.06853866577148438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.05533866584300995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.0602400004863739
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.031717332700888314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,float16,0,0.037845333417256675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,float16,0,0.03818133225043615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.031530665854612984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,float16,0,0.03982933362325033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.03389333436886469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.046122665206591286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,float16,0,0.03975466638803482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.03581333408753077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,float16,0,0.0444213350613912
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.022122666239738464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,float16,0,0.023541333774725597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.0216799999276797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,float16,0,0.024271999796231587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,float16,0,0.09159466624259949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.023168000082174938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,float16,0,0.0236160010099411
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.02903999884923299
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,float16,0,0.06650133430957794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,float16,0,0.024933333198229473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,float16,0,0.01711999997496605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.015360000232855478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,float16,0,0.016682667036851246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,float16,0,0.017237332959969837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.01953599974513054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,float16,0,0.01313599944114685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,float16,0,0.027509334186712902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,float16,0,0.01704000060757001
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.023050665855407715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,float16,0,0.013306666165590286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,float16,0,0.013450667262077332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,float16,0,0.41687464714050293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,float16,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,float16,0,0.42481064796447754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.3470880190531413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,float16,0,0.4407680034637451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.36527466773986816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,float16,0,0.44063464800516766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.3643466631571452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,float16,0,0.21716266870498657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,float16,0,0.27436800797780353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.17405333121617636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,float16,0,0.22103466590245566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.17620799938837686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,float16,0,0.22873600323994955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.34025601545969647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.19015467166900635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,float16,0,0.2309760053952535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,float16,0,0.11638933420181274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,float16,0,0.11738133430480957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.18446399768193564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.12974400321642557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.09865599870681763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,float16,0,0.14686933159828186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,float16,0,0.11711466312408447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,float16,0,0.12014933427174886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.09898666540781657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,float16,0,0.06471466521422069
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.05223466455936432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,float16,0,0.06413333117961884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.05293866495291392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.06761066615581512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,float16,0,0.06630933284759521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,float16,0,0.07648533085982005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.054234668612480164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,float16,0,0.06492800017197926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.054234668612480164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.09428800145785014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.032074667513370514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.09387200077374776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,float16,0,0.03788266579310099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.032111999889214836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,float16,0,0.03757333258787791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,float16,0,0.037690666814645134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.032511999209721885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,float16,0,0.03965866565704346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.03159466634194056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,float16,0,0.03760000069936117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,float16,0,0.023317334552605946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,float16,0,0.023221333821614582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,float16,0,0.025946666797002155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,float16,0,0.023232000569502514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.24260266621907553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.021226666867733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,float16,0,0.023397333920001984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.021498667697111767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.015365333606799444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,float16,0,0.01716800034046173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.015429332852363586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,float16,0,0.01741333305835724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.015333333363135656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.021157334248224895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,float16,0,0.019173332800467808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.026687999566396076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,float16,0,0.015109332899252573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,float16,0,0.011855999628702799
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,float16,0,0.012191999703645706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,float16,0,0.011440000186363855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,float16,0,0.013349333157142004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.01951466624935468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,0,0.023472001155217487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,0,0.02940800040960312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.025631998976071674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.08082666496435802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,float16,0,0.1034986674785614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.045968001087506614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.012186666329701742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.012351999680201212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,float16,0,0.060005332032839455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,0,0.014970666418472925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,float16,0,0.03139200061559677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,0,0.01544533297419548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,0,0.019189332922299702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.02128533273935318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.009632000078757605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,0,0.011514666179815928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,0,0.009029333169261614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.02978666623433431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.009695999945203463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,0,0.009599999835093817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.00949866697192192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,0,0.009514666472872099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,float16,0,0.021242665747801464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,0,0.009328000247478485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,float16,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,float16,0,0.01146666705608368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.01403733342885971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.009818666925032934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,0,0.009226666763424873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.009194666519761086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,17.10975519816081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,17.217851003011067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,float16,0,22.89259084065755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,float16,0,22.867050170898438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,17.773258209228516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,float16,0,23.572469075520832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,float16,0,12.419813791910807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,9.958752314249674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,float16,0,11.781519571940104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,17.519461313883465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,float16,0,23.789098103841145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,8.923248291015625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,float16,0,11.480901082356771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,8.744858423868815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,5.06603209177653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,8.871135711669922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,float16,0,5.4400482177734375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,float16,0,6.113242467244466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,float16,0,11.515637715657553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,4.310405413309733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,float16,0,12.12771224975586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,float16,0,5.41047477722168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,4.2642825444539385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,4.68773873647054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,float16,0,6.120778401692708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,4.528858820597331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,float16,0,2.6351946194966636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,float16,0,3.029850641886393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,8.810703913370768
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,float16,0,6.451674779256185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,2.964602788289388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,2.1974612871805825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,2.1872000694274902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,float16,0,2.807978630065918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,float16,0,2.796346664428711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,2.7885119120279946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,float16,0,2.765813191731771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.9149866104125977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,9.861029307047525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,9.944197336832682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,float16,0,13.206432342529297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,10.318133036295572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,10.33350944519043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,float16,0,13.409236907958984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,float16,0,14.26797866821289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,float16,0,13.54571787516276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,4.851999918619792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,float16,0,6.574170430501302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,float16,0,7.047807693481445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,float16,0,6.528720219930013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,6.087930679321289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,5.345690409342448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,float16,0,6.484485626220703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,5.138698577880859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,float16,0,2.994469324747721
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,3.095583915710449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,float16,0,3.501898765563965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,5.467317581176758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,3.1143468221028647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,float16,0,6.863925298055013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,float16,0,3.0345226923624673
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,2.824917475382487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,2.6572853724161782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,float16,0,3.1917120615641275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,float16,0,1.5558932622273762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,1.3579467137654622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,2.673834800720215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,float16,0,3.1730934778849282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,1.7800374031066895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,float16,0,1.7860533396402996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,float16,0,1.5717013676961262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,1.4800160725911458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,float16,0,1.6429866154988606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,1.3970506985982258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,float16,0,1.6141600608825684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.6025546391805012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,6.856394449869792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,7.019936243693034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,7.242111841837565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,float16,0,9.176170349121094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,float16,0,9.358309427897135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,float16,0,9.790074666341146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,7.410602569580078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,float16,0,9.644138971964518
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,float16,0,4.328789393107097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,4.132218678792317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,4.4934186935424805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,float16,0,4.497242609659831
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,float16,0,4.995146751403809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,3.8823572794596353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,float16,0,4.670085271199544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,4.8307145436604815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,1.8537119229634602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,3.737855911254883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,float16,0,4.429264068603516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,float16,0,2.4057013193766275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,float16,0,2.152357260386149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,1.817973295847575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,2.3647359212239585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,float16,0,2.5356906255086265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,float16,0,2.2590667406717935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.9218239784240723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,2.105946699778239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,float16,0,2.3442400296529136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,1.1896533171335857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,float16,0,1.2082826296488445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.9424959818522135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,float16,0,1.113205353418986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.9268853664398193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,float16,0,1.1722666422526042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,float16,0,1.301194667816162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,1.1554400126139324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,1.1016960144042969
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,float16,0,1.1586399873097737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,9.19807497660319
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,9.277402877807617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,9.987546920776367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,float16,0,12.132394154866537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,9.723695755004883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,float16,0,12.410692850748697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,float16,0,12.872533162434896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,float16,0,12.763195037841797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,float16,0,5.869578679402669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,5.341087977091472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,4.785397211710612
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,float16,0,5.783231735229492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,6.1806081136067705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,float16,0,6.856869379679362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,float16,0,5.994831720987956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,5.779802958170573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,float16,0,2.785045305887858
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,4.999530792236328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,2.3829867045084634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,float16,0,6.099194844563802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,2.3477813402811685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,float16,0,3.0723307927449546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,3.241765340169271
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,float16,0,3.270789464314779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.4861547152201333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,float16,0,1.4073120752970378
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,float16,0,2.974575996398926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,1.3025600115458171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,float16,0,1.414346694946289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,float16,0,1.7125600179036458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,1.9237972895304363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,1.2106346289316814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,float16,0,1.5885012944539387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.3607093493143718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,float16,0,1.4811414082845051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.323749303817749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,float16,0,0.7380906740824381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.6650559902191162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,float16,0,0.8851626714070638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,float16,0,0.7460693518320719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.6506719986597697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,float16,0,3.407072067260742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,0.8698986371358236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,float16,0,0.7806133429209391
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.6954826513926188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,float16,0,0.7779946327209473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,0.7010933558146158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,2.8652213414510093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,5.269594510396321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,float16,0,6.692192077636719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,5.402432123819987
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,float16,0,7.084367752075195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.866383870442708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,float16,0,7.349178949991862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,float16,0,7.382853190104167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,2.6743787129720054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,float16,0,3.6039253870646157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,5.938079833984375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,float16,0,3.2555465698242188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,3.9273811976114907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,float16,0,3.7170559565226235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,float16,0,4.18617598215739
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.963616053263346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,float16,0,3.541407903035482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,3.041621208190918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,float16,0,1.6185812950134277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.4738720258076985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,float16,0,1.6655680338541667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,1.9912427266438801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.6292799313863118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,float16,0,2.083797295888265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,float16,0,1.7726027170817058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.6120853424072266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,1.6193440755208333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,float16,0,1.784981409708659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,float16,0,0.8390453656514486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,float16,0,0.8434986273447672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,3.1216586430867515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,1.1139039993286133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.7568533420562744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,float16,0,0.9097386995951334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,float16,0,1.055957317352295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.8148106733957926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,0.8033920129140218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,float16,0,0.8932106494903564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,float16,0,0.44259198506673175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.39638400077819824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.39306668440500897
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,float16,0,0.5478026469548544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,float16,0,0.47041066487630206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,0.5486186742782593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.4349546829859416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.43161598841349286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,float16,0,0.47654398282368976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.7621920108795166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,5.080101331075032
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,5.258442560831706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,float16,0,0.44789334138234455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,float16,0,6.31009038289388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,float16,0,6.340181350708008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,6.00050163269043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,float16,0,7.255178451538086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,float16,0,3.0235093434651694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,3.1225814819335938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,float16,0,3.1250025431315103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,float16,0,7.014954884847005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,3.035178820292155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,4.223269462585449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,float16,0,4.301210721333821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,5.945834477742513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,3.0193812052408853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,float16,0,3.527381261189779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,float16,0,3.4056320190429688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,3.7395413716634116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,float16,0,1.5527733167012532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.303008000055949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,float16,0,2.134064038594564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,float16,0,1.5787679354349773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,2.1372267405192056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.5967146555582683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,float16,0,1.7979626655578613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.5511199633280437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.7959466775258383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,float16,0,0.7799413204193115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,float16,0,0.8079626560211182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,1.5775893529256184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,float16,0,1.7240907351175945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.696021318435669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,float16,0,1.079200029373169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,float16,0,0.8765707015991211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,1.16157865524292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.8060586452484131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,float16,0,0.8761119842529297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,float16,0,0.40668265024820965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.3793066740036011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,float16,0,0.5599413315455118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.36794133981068927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,float16,0,0.419487992922465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,0.5680533250172933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,float16,0,0.45210667451222736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.42240532239278156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,float16,0,0.22295467058817545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.4345066547393799
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.20403200387954712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,float16,0,0.2276159922281901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.2117919921875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.22779732942581177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,0.30876266956329346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,float16,0,0.24940800666809082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,float16,0,0.24774932861328125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.23009065786997476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,0.7840800285339355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,3.0958614349365234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,float16,0,3.68830935160319
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,float16,0,3.7604052225748696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,3.220010757446289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,float16,0,0.4567840099334717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,float16,0,4.20193608601888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,float16,0,0.3219359914461772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,float16,0,1.8154293696085613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,3.8434718449910483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.5624213218688965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,2.808128039042155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,float16,0,2.725071907043457
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,3.8803841272989907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,1.756666660308838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,float16,0,1.8448479970296223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,float16,0,2.128671964009603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,1.848709265391032
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,float16,0,2.147482713063558
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,1.9368480046590169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,float16,0,0.9166293144226074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,float16,0,0.9435839653015137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,float16,0,1.3807039260864258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,0.9084266821543375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,0.9767306645711263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,float16,0,1.0629013379414876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,float16,0,1.0761653582255046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,0.9914133548736572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,float16,0,4.469578742980957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,float16,0,0.47322134176890057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.42209064960479736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,float16,0,0.4926186800003052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,float16,0,0.704528013865153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.44312532742818195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,float16,0,0.5416160027186075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.5201760133107504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,0.7304426829020182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,float16,0,0.5410933494567871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,float16,0,0.25389333566029865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.5098400115966797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.23216533660888672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,float16,0,0.25922133525212604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.8007520039876302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.24004799127578735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,float16,0,0.2906186580657959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,float16,0,0.3666613499323527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.2755146622657776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.27746667464574176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,1.4373280207316081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.13517333070437113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,0.21331199010213217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,float16,0,0.14416000247001648
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,float16,0,0.21142399311065674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.13918933272361755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,float16,0,0.156031996011734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.15221333503723145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,float16,0,0.15983999768892923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,float16,0,0.2911146680514018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,0.3872053225835164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,3.1260159810384116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,float16,0,3.638607978820801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,float16,0,3.7603413263956704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,3.303285280863444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,float16,0,0.1474720040957133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.15014400084813437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,float16,0,1.8173227310180664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,1.5837599436442058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,float16,0,4.453253428141276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,4.15389347076416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,float16,0,4.310144106547038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,4.047408103942871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,float16,0,1.8785279591878254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,1.6660319964090984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,3.227386792500814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,float16,0,3.0284639994303384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,1.9372639656066895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,float16,0,2.2680746714274087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,float16,0,2.220208009084066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,1.9868532816569011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,0.8062506516774496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,float16,0,0.9388213157653809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,float16,0,0.9147733052571615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,0.921674648920695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,float16,0,1.10862930615743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,1.0025280316670735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,1.0215253035227458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,float16,0,1.1210453510284424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,float16,0,1.5201652844746907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,1.6302666664123535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,float16,0,0.46109334627787274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.4198453426361084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,float16,0,0.4789493481318156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,float16,0,0.5694506565729777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,0.8348320325215658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.4442773262659709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,float16,0,0.768501361211141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,float16,0,0.5578560034434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,0.5252373218536377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,float16,0,0.24920000632603964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.23054933547973633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.23932800690333048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,float16,0,0.2951200008392334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,float16,0,0.39658665657043457
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,0.4320266644159953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,float16,0,0.29365867376327515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.2818079988161723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.28940800825754803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.1276533305644989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,float16,0,0.13993066549301147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,float16,0,0.14216533303260803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.13217066725095114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,float16,0,0.16280532876650491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.15084800124168396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,float16,0,0.2176533341407776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,0.23244800170262656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,float16,0,0.16241600116093954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.15475733081499735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,float16,0,0.08387200037638347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.07787199815114339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.5290133158365885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,float16,0,0.08500799536705017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.13005333145459494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,float16,0,0.09149866302808125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,float16,0,0.13338667154312134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,float16,0,0.09116266171137492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,float16,0,0.2555999954541524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,float16,0,2.249226729075114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,float16,0,2.3238773345947266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,float16,0,2.849930763244629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,2.1224212646484375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.0805920014778773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.0885653297106425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.08866666754086812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,2.5435946782430015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,2.6085012753804526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,float16,0,2.7642666498819985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,float16,0,1.1201386451721191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,1.0038879712422688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,float16,0,1.1783733367919922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,float16,0,2.024890740712484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,1.992143948872884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,1.0681386788686116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,2.2430879275004068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,float16,0,1.4352693557739258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,1.4043466250101726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,float16,0,1.4066987037658691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,float16,0,0.5730986595153809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,0.5484853188196818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,1.3864960670471191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.51910932858785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,float16,0,0.7186133066813151
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,0.6793226401011149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,float16,0,1.0211040178934734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,float16,0,0.7077013651529948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,1.135589361190796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,0.6793226401011149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,float16,0,0.29678932825724286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,float16,0,0.3696906566619873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.2982719937960307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.275653342405955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.3595199982325236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,0.580512007077535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,float16,0,0.3705120086669922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.360586682955424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,float16,0,0.164901336034139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.15222932895024618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,float16,0,0.16986666123072305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,float16,0,0.5964746475219727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.19970667362213135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.1606773336728414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,float16,0,0.20017067591349283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,float16,0,0.2740373412768046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.1968266765276591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,float16,0,0.09431466460227966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,float16,0,0.20350933074951172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.08709333340326945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,float16,0,0.30867733558019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.09180266658465068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,float16,0,0.09697600205739339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,float16,0,0.1074666678905487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.10359999537467957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,float16,0,0.109525332848231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.10566400488217671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,float16,0,0.06037333110968272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,float16,0,0.5194079875946045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.056234667698542275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,float16,0,0.09664000074068706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.09109866619110107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,0.3059200048446655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.06316799918810527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.06371200084686279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,float16,0,0.15253866712252298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,float16,0,2.342202663421631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,float16,0,0.06156266729036967
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,float16,0,2.516581376393636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,2.1427040100097656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,float16,0,0.06542933483918507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,2.341578642527262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.1667893330256144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,2.9356959660847983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,2.817381223042806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,float16,0,3.0369653701782227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,float16,0,3.092325210571289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.057664001981417336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,float16,0,2.389418601989746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,float16,0,1.1844053268432617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,2.7251466115315757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,1.0870292981465657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,float16,0,1.2426133155822754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,1.1734453042348225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,float16,0,1.6191199620564778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,float16,0,1.5546612739562988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,1.4595306714375813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,1.5950560569763184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,float16,0,0.5897173484166464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,0.5578506787618002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,float16,0,0.066021333138148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,float16,0,0.6382079919179281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,float16,0,1.208906650543213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,0.598202665646871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,float16,0,0.788266658782959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,0.7918826738993326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,1.37335999806722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,0.7577599684397379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,float16,0,0.31260265906651813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,0.697381337483724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,float16,0,0.3288426597913106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.40573867162068683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,float16,0,0.41702401638031006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.3183679978052775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,float16,0,0.39933868249257404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,float16,0,0.17106133699417114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,0.39556801319122314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.15973866979281107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.17099199692408243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,0.36116798718770343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,float16,0,0.31777600447336835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,float16,0,0.21712533632914224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,float16,0,0.8195626735687256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.21277866760889688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,float16,0,0.2250666618347168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.22485333681106567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,float16,0,0.0960640013217926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.0883840024471283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.09346666932106018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,float16,0,0.09959466258684795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.11002666751543681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.29234667619069415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.19324799378712973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,float16,0,0.6115093231201172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,float16,0,0.17128000656763712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.05530133346716563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.11412800351778667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.10564266641934712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,float16,0,0.1804693341255188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,float16,0,0.09562666217486064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,float16,0,0.06500266492366791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.06474133332570393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,float16,0,0.06563200056552887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.06625600159168243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,float16,0,0.039813332259655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.037776000797748566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,float16,0,0.0543146679798762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,float16,0,0.04180799921353658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.03957866628964742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,float16,0,0.1195146640141805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,float16,0,0.12308800220489502
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,float16,0,0.04399999976158142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.04364799956480662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.044362664222717285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,float16,0,0.04369066655635834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,float16,0,0.05835733314355215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,float16,0,0.06026133398214976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,float16,0,1.7589707374572754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,1.6489280064900715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.058330665032068886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,float16,0,1.9115734100341797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,1.8196959495544434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,float16,0,2.560138702392578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,2.311024030049642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,float16,0,0.8966346581776937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.05684266487757365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,float16,0,2.088090737660726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,2.4554293950398765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,float16,0,0.9629813035329183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,0.836197296778361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,0.9219733079274496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,float16,0,2.447824001312256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,2.5861600240071616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,float16,0,1.3384639422098796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,1.2152746518452961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,float16,0,1.236191987991333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,1.2705919742584229
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,float16,0,1.0482186476389568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,float16,0,0.45551466941833496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,float16,0,0.49616531531016034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,0.42654399077097577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,float16,0,0.6518826484680176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,0.476032018661499
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,float16,0,0.6302133401234945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,0.6395573218663534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,float16,0,0.2439253330230713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,float16,0,0.536074678103129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.22568533817927042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.2462559938430786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,float16,0,0.2624746759732564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,float16,0,0.3500373363494873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,0.6290133396784464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.34802667299906415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,float16,0,0.3315466642379761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.3395306666692098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,float16,0,0.1344480017820994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.12684266765912375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,0.32501333951950073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,float16,0,0.14406399925549826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,float16,0,0.2802346746126811
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,float16,0,0.17960000038146973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,float16,0,0.17771732807159424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,0.6236159801483154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.18870399395624796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,float16,0,0.07650133470694225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.06923733154932658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,float16,0,0.08095466593901317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,float16,0,0.1502346694469452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.17276267210642496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.07458133498827617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,1.238111972808838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,float16,0,0.10174933075904846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.09355200330416362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,float16,0,0.0985599954922994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,float16,0,0.046015997727712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,float16,0,0.047279998660087585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.13619732856750488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.0441599984963735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,float16,0,0.05216533442338308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.09289600451787312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.18703999121983847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.052149335543314614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.0539626677831014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,float16,0,0.052784000833829246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,float16,0,0.031583999594052635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,float16,0,0.03173333406448364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.031013332307338715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.03572800010442734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,float16,0,0.043765331308046974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.04990399877230326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,float16,0,0.03572266548871994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,float16,0,0.021482666333516438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.021141332884629566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.09251200159390767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,float16,0,0.021168000996112823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.04199466605981191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.03161066770553589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.021082667013009388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,float16,0,0.027488000690937042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,float16,0,0.02342933416366577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.02310933421055476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,float16,0,0.023258666197458904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.02334933231274287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,float16,0,0.09340799848238628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,float16,0,0.034101332227389015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,float16,0,0.7244213422139486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,float16,0,0.7990293502807617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.0358240008354187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,0.7765333652496338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,float16,0,1.1584586302439372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,1.0867839654286702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,float16,0,0.3736213445663452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,float16,0,1.1197653611501057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,0.3518720070521037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,1.1646613279978435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,1.196079969406128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,float16,0,0.40556800365448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,float16,0,0.9599040349324545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,0.40241066614786786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,0.6958239873250326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,float16,0,0.19605332612991333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,float16,0,0.5668373505274454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,0.554256002108256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,0.6112853288650513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,float16,0,0.5916106700897217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,0.589194655418396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.18699200948079428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.20629332462946573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,float16,0,0.4883306821187337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,float16,0,0.3088480035463969
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,float16,0,0.30215466022491455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.28963732719421387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.2909546693166097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,0.30619200070699054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.10454400380452473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,float16,0,0.2561279932657878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,float16,0,0.11034133036931355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,float16,0,0.11886933445930481
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,float16,0,0.15525866548220316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.16081600387891135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,float16,0,0.0642986645301183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,float16,0,0.16977600256601968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,float16,0,0.2139093279838562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.059077332417170204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,float16,0,0.1381280024846395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.1635040044784546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,float16,0,0.0683840016523997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,float16,0,0.08844799796740214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.08690667152404785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.08110933502515157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,float16,0,0.03707200040419897
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.035573333501815796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,float16,0,0.03774933268626531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.03761066744724909
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,float16,0,0.07619733115037282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,float16,0,0.04386133452256521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.045642669002215065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.116565336783727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,float16,0,0.04383466641108195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.04576000074545542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,float16,0,0.026682667434215546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,float16,0,0.02733866622050603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.1576640009880066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.02752533306678136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,float16,0,0.029813334345817566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,float16,0,0.03846933444341024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.046053335070610046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,float16,0,0.029578665892283123
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,float16,0,0.017344000438849132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,float16,0,0.01740266631046931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.06422933439413707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.017029333859682083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.019498666127522785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,float16,0,0.08932800094286601
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,float16,0,0.019487999379634857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.030031998952229817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.09277866284052531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,float16,0,0.017445333302021027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,float16,0,0.017082666357358296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,float16,0,0.019199999670187633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,float16,0,0.4405119816462199
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,0.41548800468444824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,float16,0,0.47494399547576904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,float16,0,0.01762666677435239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,0.6246773401896158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,0.45979734261830646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,float16,0,0.64028267065684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,float16,0,0.6217066844304403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,float16,0,0.2518826723098755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,float16,0,0.5456586678822836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,float16,0,0.23228800296783447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,0.61899733543396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.2404266595840454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,float16,0,0.3331093390782674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,float16,0,0.0235359991590182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,0.6826186974843343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.3545813163121541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,float16,0,0.12974400321642557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,float16,0,0.3270453413327535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.3278346657752991
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,float16,0,0.13476799925168356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.12795199950536093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,float16,0,0.2683466672897339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,0.31752000252405804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,float16,0,0.17187732458114624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.11931733290354411
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.2172373334566752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.1792746583620707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,float16,0,0.17788799603780112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.18158932526906332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,float16,0,0.07167999943097432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.0644053320089976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.06938666601975758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.16687466700871786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,float16,0,0.15903466939926147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,float16,0,0.09381332993507385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.09506133198738098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.08943466345469157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,float16,0,0.03990933299064636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,float16,0,0.09430399537086487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.0391146664818128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.04022400081157684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,float16,0,0.042037333051363625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,float16,0,0.0848586658636729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.048453330993652344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,float16,0,0.048581331968307495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,float16,0,0.04789333542188009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,float16,0,0.025807999074459076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.05037866532802582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.023541333774725597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,float16,0,0.025653332471847534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.044719999035199486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,float16,0,0.029333333174387615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,float16,0,0.03933866570393244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.029653333127498627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,float16,0,0.02938666691382726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.02972800036271413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,float16,0,0.018911999960740406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,float16,0,0.07452799876530965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.018138666947682697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,float16,0,0.020634666085243225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,float16,0,0.02346133440732956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,float16,0,0.019786667078733444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,float16,0,0.01339200014869372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.0900266667207082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.01404800017674764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,float16,0,0.013477332890033722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.019296000401178997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,float16,0,0.014981333166360855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,float16,0,0.01339200014869372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.02593066543340683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.0161013330022494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,float16,0,0.013744000345468521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,float16,0,0.015706667055686314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.021104000508785248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,float16,0,0.015216000378131866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.3216426571210225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,float16,0,0.35656531651814777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,float16,0,0.37571199735005695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.34229334195454914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.014175999909639359
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,float16,0,0.4475466807683309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.4268480141957601
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.42557334899902344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,float16,0,0.4505813519159953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,float16,0,0.35602664947509766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,float16,0,0.189903994401296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.17147733767827353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,float16,0,0.1990506649017334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,float16,0,0.23314666748046875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.1818293333053589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.23105067014694214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,float16,0,0.23652799924214682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,0.3716213305791219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,float16,0,0.10337600111961365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.08981866637865703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,float16,0,0.108624001344045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,float16,0,0.1263146698474884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.11734400192896526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.1914773384730021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,float16,0,0.12686933080355325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.09455466270446777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,float16,0,0.05845866600672404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.05213866631189982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,float16,0,0.1011893351872762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.10405332843462627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.22381333510080972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,float16,0,0.05669866502285004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.11033599575360616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.06224533418814341
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,float16,0,0.06442133088906606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,float16,0,0.033573334415753685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.06241600215435028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,float16,0,0.035360001027584076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,float16,0,0.17386666933695474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,float16,0,0.04789333542188009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,float16,0,0.03758399933576584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.05217599868774414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,float16,0,0.02120000123977661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.037605332831541695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.020954666038354237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,float16,0,0.02160000056028366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.03161066770553589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,float16,0,0.027258666853109997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.023226665953795116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,float16,0,0.0235359991590182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.05341866612434387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,float16,0,0.06385600070158641
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,float16,0,0.017082666357358296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,float16,0,0.01706133286158244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,float16,0,0.017071999609470367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,float16,0,0.037578667203585304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,float16,0,0.017375999440749485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,float16,0,0.013232000172138214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,float16,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,float16,0,0.023344000180562336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,float16,0,0.015034666905800501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.02317333221435547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,float16,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,float16,0,0.019296000401178997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,float16,0,0.01173866664369901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.015173333386580149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.015664000064134598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,float16,0,0.32654400666554767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.2827039957046509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,float16,0,0.3400426705678304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.29576534032821655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,float16,0,0.37958399454752606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,float16,0,0.17813867330551147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,float16,0,0.3758933146794637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.3457760016123454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.3426773150761922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,float16,0,0.24578134218851724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.24952532847722372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.14645866552988687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,float16,0,0.18082133928934732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.15131200353304544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,float16,0,0.19970667362213135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.1709386706352234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,float16,0,0.13702932993570963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.07893866797288258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.13065600395202637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.17599999904632568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,float16,0,0.19764800866444907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,float16,0,0.10217600067456563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,float16,0,0.09577066699663798
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.089519997437795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.08981333176294963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,float16,0,0.10221866766611735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,float16,0,0.05188799897829691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.04404266675313314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,float16,0,0.055861334005991616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,float16,0,0.0545066644748052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.050106664498647056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.04742933313051859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.06678399940331776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,float16,0,0.05596800148487091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.051818668842315674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.02794666588306427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,float16,0,0.03212266663710276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.02757333219051361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,float16,0,0.03756266583998998
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,float16,0,0.033813332517941795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,float16,0,0.09325866897900899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.030671998858451843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.03979199876387914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,float16,0,0.033370666205883026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.03109866629044215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,float16,0,0.019567999988794327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.019285333653291065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,float16,0,0.02051199972629547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.023290666441122692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.01939733326435089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,float16,0,0.019343999524911244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.01939733326435089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.08062933385372162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,float16,0,0.021210665504137676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,float16,0,0.023056000471115112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,float16,0,0.015333333363135656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.015423999478419622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,float16,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,float16,0,0.01544533297419548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,float16,0,0.07622933387756348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,float16,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,float16,0,0.03186666717131933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,float16,0,0.0120319997270902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,float16,0,0.017349333812793095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.2590346733729045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.2632426619529724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,float16,0,0.33078932762145996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.285589337348938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,float16,0,0.32126933336257935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,float16,0,0.34242133299509686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,float16,0,0.1686506668726603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,float16,0,0.34749865531921387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.2868640025456746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.015498666713635126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,float16,0,0.2082293430964152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.1872053345044454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.1362613340218862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.13593600193659464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,float16,0,0.18291199207305908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,float16,0,0.16995733976364136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.14644267161687216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.07367466886838277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,float16,0,0.18148799737294516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.07539199789365132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,float16,0,0.092031995455424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,float16,0,0.09567999839782715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.1462399959564209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.07905599971612294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,float16,0,0.11044800281524658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,float16,0,0.09294933080673218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,float16,0,0.050848002235094704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.07976000010967255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.04312533140182495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.05402133365472158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.0432533323764801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,float16,0,0.05788266658782959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,float16,0,0.05323199927806854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.04417600234349569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.04601066807905833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,float16,0,0.031541332602500916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,float16,0,0.05221333106358846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,float16,0,0.031557333966096245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.027679999669392902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,float16,0,0.0313226655125618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.09819733103116353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,float16,0,0.03143999973932902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.027445333699385326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,float16,0,0.02022933339079221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,float16,0,0.020319999506076176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,float16,0,0.021525333325068157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.021407999098300934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,float16,0,0.02056533346573512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.019440000255902607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,float16,0,0.05123733480771383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,float16,0,0.01952533299724261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.018933333456516266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,float16,0,0.01515199989080429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.03183999905983607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,float16,0,0.03276800115903219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,float16,0,0.09147733449935913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,float16,0,0.01340266689658165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,float16,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.019039999693632126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.012042666474978128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,0,0.023178666830062866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,0,0.02924799919128418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.02607999990383784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.06378133098284404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,0,0.015696000307798386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.015322666615247726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,0,0.019173332800467808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.03577066709597906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,0,0.014858666807413101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.013536000003417334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,float16,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,float16,0,0.025616000096003216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.024085332949956257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,float16,0,0.08116800089677174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,0,0.010010666524370512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.00997866690158844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.01044800008336703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,float16,0,0.04634666442871094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,float16,0,0.01404800017674764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,0,0.009072000160813332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.010842667271693548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,float16,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,0,0.00892800030608972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,0,0.009216000015536943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,float16,0,0.01894933357834816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.013232000172138214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,float16,0,0.011402666568756104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.00927466650803884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,0,0.009279999881982803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,14.291328430175781
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,14.392229715983072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,float16,0,19.184463500976562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,float16,0,19.406164805094402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,14.858144124348959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,float16,0,20.019140879313152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,8.469658533732096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,float16,0,10.418816248575846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,float16,0,19.614064534505207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,7.158010482788086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,float16,0,9.566383997599283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,float16,0,9.580586751302084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,14.896464029947916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,7.5747629801432295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,3.570224126180013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,float16,0,4.34445317586263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,7.552127838134766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,7.647104263305664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,4.670960108439128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,float16,0,9.91482162475586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,float16,0,9.856378555297852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,float16,0,4.7027788162231445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,3.8065547943115234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,float16,0,4.834181467692058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,float16,0,4.486677487691243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,4.5345760981241865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,float16,0,2.7372213999430337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,2.5446720123291016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,float16,0,2.263024012247721
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,float16,0,5.2140852610270185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,2.380410671234131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,float16,0,2.6408586502075195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,2.123040040334066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,float16,0,2.361392021179199
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,2.0202132860819497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,3.899621327718099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,1.952448050181071
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,float16,0,2.361701329549154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,8.298090616861979
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,8.409215927124023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,float16,0,10.920176188151041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,float16,0,11.063915252685547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,8.737290700276693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,8.770362854003906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,float16,0,11.669595082600912
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,float16,0,11.122634887695312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,4.4870452880859375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,float16,0,5.846752166748047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,5.220042546590169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,float16,0,5.259178797403972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,4.159728050231934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,float16,0,5.588026682535808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,float16,0,5.345146814982097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,4.945834795633952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,float16,0,2.562277317047119
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,2.651306629180908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,float16,0,2.807135899861654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,float16,0,2.9900693893432617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,2.832341194152832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,float16,0,5.381727854410808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,2.126512050628662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,float16,0,2.7068265279134116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,5.165818532307942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,float16,0,1.311738650004069
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,1.2272906303405762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,float16,0,1.3091946442921956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,2.375429312388102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,float16,0,2.6895678838094077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,1.1161226431528728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,float16,0,1.5131093660990398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,1.6549280484517415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,float16,0,1.4038933118184407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,1.190399964650472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,1.2470666567484539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,float16,0,1.392149289449056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,2.3731253941853843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,5.776992162068685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,5.955482482910156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,float16,0,7.333152135213216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,float16,0,7.698357264200847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,6.18891716003418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,float16,0,8.048949559529623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,float16,0,7.91323725382487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,6.259034474690755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,float16,0,3.5691839853922525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,float16,0,4.257888158162435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,3.8714987436930337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,float16,0,4.111018816630046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,3.173551877339681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,float16,0,3.8778772354125977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,3.433898607889811
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,float16,0,2.167840003967285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,float16,0,3.861839930216471
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,float16,0,1.928869406382243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,3.4882240295410156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,1.5100693702697754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,3.474933306376139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,1.9768266677856445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,1.5342453320821126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,float16,0,1.8444159825642903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,float16,0,1.9803360303243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.6690187454223633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,1.6415626207987468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,float16,0,0.9421333471934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,float16,0,1.9075039227803547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.8656533559163412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,float16,0,0.9428266684214274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.8081013361612955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,1.031488021214803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.8911306858062744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,float16,0,0.9957760175069174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,float16,0,1.0003573099772136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,0.8755306402842203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,7.553690592447917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,float16,0,1.1181706587473552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,7.73739751180013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,float16,0,10.219477335611979
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,8.516250610351562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,float16,0,10.130709330240885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,float16,0,10.602309544881185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,float16,0,10.780778249104818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,3.795562744140625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,float16,0,4.727701187133789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,4.387605349222819
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,float16,0,4.969488143920898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,5.301989237467448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,float16,0,5.840506871541341
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,float16,0,5.432512283325195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,8.318021138509115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,4.328805287679036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,float16,0,2.3554879824320474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,2.3763252894083657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,float16,0,2.3369226455688477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,4.203920046488444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,2.6919358571370444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,float16,0,2.9359305699666343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,float16,0,5.058352152506511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.9850880304972331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,float16,0,2.5373013814290366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,2.3576265970865884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,1.0049066543579102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,float16,0,2.482879956563314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,float16,0,1.2444000244140625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,2.1425973574320474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,1.040826638539632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,float16,0,1.2216853300730388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,1.4509226481119792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,float16,0,1.4693652788798015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,1.232202688852946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,float16,0,1.2965493202209473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,1.1130080223083496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,float16,0,1.3287733395894368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,float16,0,0.6294399897257487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.554533322652181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.5470240116119385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,float16,0,0.6654933293660482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,0.7329546610514323
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.6501493453979492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,float16,0,0.7600693702697754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,float16,0,0.6700639724731445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,0.5971999963124593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,4.453824043273926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,4.62938658396403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,float16,0,5.794384002685547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,float16,0,5.706666946411133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,5.215701421101888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,float16,0,6.04855473836263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,float16,0,6.1672318776448565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,float16,0,2.9094934463500977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,2.260741392771403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,float16,0,2.732810656229655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,5.156400044759114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,float16,0,3.5830666224161782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.9378026326497397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,3.5877065658569336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,float16,0,3.030186653137207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.573472023010254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,2.568864027659098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,float16,0,1.3658773104349773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,float16,0,0.6379733482996622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,float16,0,1.8152052561442058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,1.2550773620605469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,float16,0,1.4040106137593586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,1.7400320370992024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,1.1918240388234456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.3255786895751953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,float16,0,1.5066879590352376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,float16,0,0.7059786319732666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.6113226811091105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,float16,0,1.5275306701660156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,float16,0,0.7146879831949869
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,1.3623733520507812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,float16,0,0.9185439745585123
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,0.8929653167724609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.6285226742426554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.7119146982828776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,float16,0,0.783029317855835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,float16,0,0.7886133193969727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,0.707045316696167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,float16,0,0.3739893436431885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.3388906717300415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,float16,0,0.385045329729716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,0.4798613389333089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,float16,0,2.9695841471354165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.3463519811630249
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.3845653136571248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,float16,0,0.48599998156229657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,float16,0,0.411135991414388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.389082670211792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,float16,0,0.4113493363062541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,4.320586522420247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,float16,0,5.134362538655599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,float16,0,5.443909327189128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,5.231312115987142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,float16,0,6.020373026529948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,float16,0,6.008319854736328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,float16,0,2.5278080304463706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.6044160525004068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,float16,0,2.6518400510152182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.2634827295939126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,3.747119903564453
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.487584114074707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,float16,0,4.061423937479655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,5.123050689697266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,float16,0,2.943375905354818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,2.649354616800944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,2.7028748194376626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,float16,0,3.0230347315470376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.1115893522898357
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,float16,0,1.2962559858957927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,float16,0,1.3260266780853271
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,1.8618826866149902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.166752020517985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,float16,0,1.4723572731018066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,float16,0,1.871498743693034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.403429349263509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,float16,0,0.6579786539077759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,1.3917333285013835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,float16,0,1.5147147178649902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.5856586694717407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.6492266654968262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,float16,0,0.6894400119781494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,float16,0,0.9562400182088217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,float16,0,0.759989341100057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,float16,0,0.7481066385904948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,0.697989304860433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.32018133004506427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,float16,0,0.3546239932378133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,float16,0,0.35824533303578693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.3279786705970764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.3757386604944865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,0.5026293198267618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,float16,0,0.49324266115824383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,float16,0,0.3969866832097371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.37054399649302167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,float16,0,0.19819732507069907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.18095467487970987
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,float16,0,0.19836799303690592
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.18730133771896362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,float16,0,0.2174773414929708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.2018773357073466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.20544000466664633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,float16,0,0.22181334098180136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.7062239646911621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,0.9529333114624023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,float16,0,3.057594617207845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,2.634709358215332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,float16,0,3.2014026641845703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,2.768090565999349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,float16,0,3.7096160252889
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,0.27621867259343463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,3.311306635538737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,float16,0,3.5983947118123374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,3.518757184346517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,float16,0,1.553866704305013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,2.499311923980713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.3523680369059246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.4141279856363933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,float16,0,0.3965760072072347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,float16,0,1.6090720494588215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,float16,0,1.8332319259643555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,1.6512959798177083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,float16,0,1.7939626375834148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,float16,0,0.7858239809672037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.6905173460642496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,1.6653493245442708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,float16,0,0.8049653371175131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,float16,0,1.200704018274943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,1.2600693702697754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,0.86516801516215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,float16,0,0.9279253482818604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,float16,0,2.379151980082194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,float16,0,0.9135146935780843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,0.8574026425679525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.36591466267903644
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,float16,0,0.4248053232828776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,float16,0,0.4039413531621297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,float16,0,0.6172053416570028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.38159998257954914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,float16,0,0.4732319911321004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.45961066087086994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,float16,0,0.4790240128835042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.459114670753479
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,float16,0,0.26741333802541095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,float16,0,0.22726933161417642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.7207199732462565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.20109333594640097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,float16,0,0.22820266087849936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,float16,0,0.32763733466466266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.21074666579564413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.247216006120046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,float16,0,0.2596000035603841
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,float16,0,0.12771733601888022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.12083733081817627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,0.19364267587661743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,float16,0,0.1992266575495402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,float16,0,0.13024533788363138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.12362666924794515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,0.6439679861068726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,float16,0,0.13981333374977112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.13521066308021545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,float16,0,0.14376533031463623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,0.34303998947143555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,float16,0,0.25935999552408856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,float16,0,3.0717385609944663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,2.687941233317057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.24904000759124756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,2.8557812372843423
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,float16,0,3.172085444132487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.13686399658521017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,float16,0,3.787343978881836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,3.508271853129069
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,float16,0,1.5328373908996582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,float16,0,3.850799878438314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,3.5655412673950195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,1.3599467277526855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,2.8428214391072593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,float16,0,1.5967307090759277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,1.4432053565979004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,float16,0,1.9308266639709473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,1.7942345937093098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,float16,0,1.882736047108968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,float16,0,0.7821919918060303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,1.8694453239440918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,0.7049547036488851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,float16,0,1.3317546844482422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,float16,0,0.9719306627909342
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,0.9357066949208578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,0.7385866641998291
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,float16,0,0.8085172971089681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,1.4432106018066406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,float16,0,0.9397333463033041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,float16,0,0.39667733510335285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.3642880121866862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,float16,0,0.4142293135325114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,float16,0,2.6663734118143716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,0.9148800373077393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,float16,0,0.682522694269816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,float16,0,0.5028906663258871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,0.7338986396789551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.3893119891484578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,0.48074134190877277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,float16,0,0.21624533335367838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,float16,0,0.48317333062489826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.2018079956372579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,0.38517332077026367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,float16,0,0.22662933667500815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,float16,0,0.35383466879526776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.21621867020924887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.2644373377164205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,float16,0,0.26473599672317505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,float16,0,0.12171733379364014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,float16,0,0.1260693371295929
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.11410133043924968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.11901332934697469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,0.20882133642832437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.13550399740537009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.1378933290640513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,float16,0,0.14882666865984598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,float16,0,0.07478400071461995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.07065066695213318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.4989493290583293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,float16,0,0.07671999931335449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.11206400394439697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.07283733288447063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,float16,0,0.08247466882069905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.08137066662311554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,float16,0,0.08335999647776286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.08147733410199483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.25838400920232135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,float16,0,1.8704640070597331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,1.710096041361491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,float16,0,0.14564266800880432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,float16,0,1.9860960642496746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,float16,0,0.1206666628519694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,1.8438506126403809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,float16,0,2.510591983795166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,2.424895922342936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,float16,0,0.26733867327372235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,float16,0,2.4019840558369956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,float16,0,0.9620746771494547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,2.3124799728393555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,0.8725759983062744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,float16,0,1.0007466475168865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,float16,0,1.7943679491678874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,float16,0,0.19454399744669595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,1.981594721476237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,0.9373013178507487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,float16,0,1.2659413019816081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,1.1503893534342449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,1.1905706723531086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,float16,0,0.4886346658070882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.4497386614481608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,float16,0,0.5142240126927694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,float16,0,0.9079360167185465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,1.0038506984710693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,float16,0,1.2334720293680828
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.4838240146636963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,0.6200586557388306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.243450661500295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,float16,0,0.6420053243637085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,float16,0,0.2692906657854716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,float16,0,0.3277333378791809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,float16,0,0.46829867362976074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,0.5182400147120158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.32129067182540894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.3405493497848511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,float16,0,0.14645333091417947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,0.27315733830134076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.1323360006014506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,float16,0,0.15281599760055542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.14098133643468222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,float16,0,0.18066134055455527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.17526400089263916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,float16,0,0.6340106725692749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.1774453322092692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,float16,0,0.17715734243392944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,float16,0,0.2599946657816569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,0.6117546558380127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.08002666632334392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,float16,0,0.08506666620572408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.2569493254025777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.08405333757400513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,float16,0,0.15285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.15179733435312906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,float16,0,0.33011200030644733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.09693866968154907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,float16,0,0.10140800476074219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.052522664268811546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,float16,0,0.055546666185061135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,float16,0,0.2706186572710673
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,float16,0,0.05740800003210703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.054655998945236206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,float16,0,0.06250666578610738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.06009600063165029
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,float16,0,0.06031466523806254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.0614026685555776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,float16,0,0.08347200353940327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,float16,0,0.09595732887585957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,float16,0,1.9874879519144695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.09495466947555542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,1.8674453099568684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,float16,0,2.149343967437744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,float16,0,0.08486933509508769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.08101866642634074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,float16,0,2.675717353820801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,2.566666603088379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,float16,0,2.8725172678629556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,float16,0,1.0130933125813801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,2.666543960571289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,float16,0,2.147984027862549
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,0.9435733159383138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,2.413248062133789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,float16,0,1.3998506863911946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,float16,0,1.070682684580485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,1.031546672185262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,float16,0,1.3729546864827473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,1.3019519646962483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,2.0321067174275718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,1.452880064646403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,float16,0,0.5128639936447144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,float16,0,1.080181360244751
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,0.48129065831502277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,float16,0,0.5581920146942139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,0.6774720350901285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,0.6871893405914307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,float16,0,0.6851466496785482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,float16,0,0.7101279894510905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,0.5270453294118246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,1.21888001759847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,float16,0,0.5507093270619711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.25623466571172077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,float16,0,0.2720053394635518
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,float16,0,0.29532267649968463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.2784053285916646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,0.6236426830291748
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,float16,0,0.37860798835754395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.3892853260040283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,float16,0,0.36138665676116943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.3606613477071126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,float16,0,0.15218666195869446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.15289599696795145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,float16,0,0.1977013349533081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.20217067003250122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,float16,0,0.1602079967657725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.1425973375638326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,float16,0,0.2888053258260091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,0.32516799370447796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,float16,0,0.0851200024286906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,float16,0,0.08914666374524434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.07881066699822743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.2062186598777771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.08522666494051616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,float16,0,0.10311999917030334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.10125866532325745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,float16,0,0.10994133353233337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.10371733705202739
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.17385067542394003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.04839999973773956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,float16,0,0.05187733471393585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,float16,0,0.0539680023988088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.05035200218359629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.08975999553998311
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,float16,0,0.059418668349583946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,float16,0,0.08709333340326945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.06011733412742615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,float16,0,0.06076799829800924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.03565866748491923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,float16,0,0.2015893260637919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,float16,0,0.0382080003619194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.055530667304992676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.037605332831541695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,float16,0,0.04062933226426443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.04119466741879781
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.04158399999141693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,float16,0,0.15666666626930237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,float16,0,1.5294666290283203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.05842666824658712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,1.445247968037923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,float16,0,1.670032024383545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,float16,0,0.03759466608365377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,1.6144372622172039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,float16,0,0.04177066683769226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,float16,0,2.4122026761372886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,float16,0,0.7799519697825114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,float16,0,1.8955626487731934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,2.1952746709187827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,2.154991944630941
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,2.2302133242289224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,0.7349119981129965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,float16,0,0.04800533254941305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,float16,0,0.8489120006561279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,float16,0,2.2520267168680825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,0.8165547053019205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,float16,0,1.1943999926249187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,1.1422719955444336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,0.38192001978556317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,float16,0,0.9547146956125895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,1.1077280044555664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,1.1433013280232747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,0.4188586473464966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,float16,0,0.43383999665578205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,float16,0,0.6163359880447388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,float16,0,1.153498649597168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,0.6023360093434652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,float16,0,0.2157920002937317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,0.609770655632019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,float16,0,0.23149865865707397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,float16,0,0.4875946839650472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,0.5662453174591064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.22803733746210733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,float16,0,0.40163199106852215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.33187733093897503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,float16,0,0.3163413405418396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,float16,0,0.12064533432324727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,float16,0,0.3113973339398702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.3044533332188924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,float16,0,0.12822932998339334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.1116480032602946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,0.29397332668304443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,float16,0,0.1650773286819458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.16526933511098227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,float16,0,0.17269333203633627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.16906134287516275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,float16,0,0.5719466606775919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.15743466218312582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,float16,0,0.0676693320274353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.06233599781990051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,float16,0,0.13910933335622153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.06865066786607106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,float16,0,0.0890826682249705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.08469866712888081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,float16,0,0.07088533540566762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.20139733950297037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,float16,0,0.09120532870292664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.08640000224113464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,float16,0,0.04164266586303711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.03995199998219808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,float16,0,0.04358933369318644
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.041738669077555336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.04956266780694326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,float16,0,0.04993600149949392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.08186666667461395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,float16,0,0.2553173303604126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,float16,0,0.04986133178075155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.12230933705965678
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.029359998802344005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,float16,0,0.030591999491055805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.04946133494377136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.03108799954255422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,float16,0,0.034634667138258614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.03498133271932602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,float16,0,0.0352906659245491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,float16,0,0.027530667682488758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.03134933362404505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,float16,0,0.021402666966120403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,float16,0,0.0775733341773351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,float16,0,0.02345066765944163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.023423999547958374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.0498879998922348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,float16,0,0.023423999547958374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.023365333676338196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,float16,0,0.03153600047032038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.03565333286921183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,0.6111786762873331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,float16,0,0.6356213490168253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,float16,0,0.7152319749196371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,0.6988853613535563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,float16,0,0.021354667842388153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,float16,0,1.039072036743164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,1.0346613725026448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,float16,0,0.3275093237559001
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,1.0442826747894287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,float16,0,0.8808106581370035
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,float16,0,1.0730133056640625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,0.3144266605377197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,float16,0,0.04137066751718521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,0.3638559977213542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,float16,0,0.3644373416900635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,float16,0,0.5230933427810669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,float16,0,0.17900800704956055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,0.5256799856821696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,float16,0,0.46463465690612793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,0.518447995185852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,float16,0,0.5421280066172282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,0.5323413213094076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,float16,0,0.1974773406982422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.19799999396006265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.1702186663945516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,float16,0,0.286789337793986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,float16,0,0.2648800015449524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.299072007338206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,1.0222400029500325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.27534933884938556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,float16,0,0.09818666179974873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,float16,0,0.2342080076535543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.10347732901573181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,float16,0,0.10657067100207011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,0.2760000030199687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.14799466729164124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,float16,0,0.05619200070699056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.14749333262443542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,float16,0,0.14497066537539163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.053786665201187134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,float16,0,0.06027733286221822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.07644799848397572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,float16,0,0.12847999731699625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.14816000064214072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,float16,0,0.0775733341773351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,float16,0,0.08285866677761078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.07541333138942719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,float16,0,0.03754666695992152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.035504000882307686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,float16,0,0.03783999880154928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.037962667644023895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,float16,0,0.07999466856320699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.07613333563009898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,float16,0,0.0439573327700297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,float16,0,0.04443199932575226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.04584000011285146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.09331199526786804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,float16,0,0.02700799951950709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.02606400102376938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,float16,0,0.0273333340883255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,float16,0,0.029509333272775013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.04620266457398733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,float16,0,0.14600533246994019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.031770666440327965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,float16,0,0.030586667358875275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,float16,0,0.017290666699409485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,float16,0,0.018058666338523228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,float16,0,0.023344000180562336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.0581226646900177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.017738666385412216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.028064000109831493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.02139200021823247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,float16,0,0.01932799940307935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,float16,0,0.017407999684413273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,float16,0,0.017514667163292568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,float16,0,0.01931200052301089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.021781332790851593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,float16,0,0.01748266691962878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.01756799966096878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,float16,0,0.017237332959969837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.01781333362062772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,float16,0,0.03755733370780945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.01899733394384384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,float16,0,0.3874400059382121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,0.3678239981333415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,float16,0,0.42742399374643963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,0.40988266468048096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.01939733326435089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,float16,0,0.57805867989858
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,float16,0,0.20518400271733603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.19703465700149536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,float16,0,0.22770132621129355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,0.5779360135396322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,0.5855253140131632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,float16,0,0.574293335278829
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,0.5540693203608195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.32386666536331177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,float16,0,0.2966933250427246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,float16,0,0.29233066240946454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.2967200080553691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,float16,0,0.11218667030334473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.28757866223653156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.1058026651541392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,float16,0,0.12140267093976338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,float16,0,0.1623253325621287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.116949329773585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,float16,0,0.24686400095621744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.16506666938463846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,float16,0,0.15994133551915488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.16241066654523215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,float16,0,0.06230400005976359
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.05624533196290334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.15061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,float16,0,0.47571734587351483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,float16,0,0.1309333344300588
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.07864533364772797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,float16,0,0.08506133159001668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,float16,0,0.06644799808661143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.2172586719195048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.07845866680145264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,float16,0,0.03630933413902918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,float16,0,0.03793599953254064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,float16,0,0.07321600119272868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.037434667348861694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,float16,0,0.04378133515516917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.07787733276685078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.04585599899291992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,float16,0,0.04379733403523763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,float16,0,0.02535466601451238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.023168000082174938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,float16,0,0.025285333395004272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,float16,0,0.03554133325815201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,float16,0,0.027632000545660656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.02959466725587845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,float16,0,0.028031999866167705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.02959999938805898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.062309334675470986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,float16,0,0.019391999890406925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,float16,0,0.08506666620572408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,float16,0,0.023215999205907185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,float16,0,0.01921066641807556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.018954666952292126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.04572799801826477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,float16,0,0.01933866615096728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.021317332983016968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,float16,0,0.013408000270525614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.01332266628742218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.02571733295917511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,float16,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.04403733213742574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,float16,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,float16,0,0.015178666760524115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,float16,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.0144213338692983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.03568000098069509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.014688000082969666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.02762666592995326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,float16,0,0.01313599944114685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,float16,0,0.02102400114138921
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,float16,0,0.3038133382797241
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.27832533915837604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,float16,0,0.32710399230321247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.30479466915130615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,float16,0,0.4003680149714152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.3846293290456136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,float16,0,0.40092798074086505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,float16,0,0.16343466440836588
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.3824853499730428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,float16,0,0.3142293294270833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.14881599942843118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.3309440016746521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,float16,0,0.1720693310101827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,float16,0,0.20849599440892538
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.15972266594568887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.2119200030962626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.20667733748753866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,float16,0,0.08869333068529765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.07936533292134602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.17201600472132364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,float16,0,0.2084640065828959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.10133333007494609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.08506666620572408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,float16,0,0.09393067161242168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,float16,0,0.11190399527549744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,float16,0,0.15594133734703064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,float16,0,0.11338667074839275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.10245333115259807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,float16,0,0.04978133241335551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.04433600107828776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.08666666348775227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,float16,0,0.05130666494369507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,float16,0,0.05608533322811127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.055029332637786865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.05593599875768026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,float16,0,0.0580213318268458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,float16,0,0.08382933338483174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.0479360024134318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,float16,0,0.02951466788848241
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,float16,0,0.03142400085926056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.02770666778087616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.02961066613594691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.04806933303674062
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.03387200087308884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.03338133295377096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,float16,0,0.04065600037574768
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.020245333512624104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,float16,0,0.021066665649414062
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,float16,0,0.021162666380405426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.019120000302791595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.022869333624839783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.02204799900452296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,float16,0,0.02256533255179723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,float16,0,0.023237332701683044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,float16,0,0.01632533346613248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,float16,0,0.01710933322707812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.01580799991885821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,float16,0,0.019007999449968338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,float16,0,0.017263999829689663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,float16,0,0.01706133286158244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.018031999468803406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.015365333606799444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,float16,0,0.013738666971524557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,float16,0,0.03454400102297465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.013317332913478216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,float16,0,0.03379733363787333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,float16,0,0.02606933315594991
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.020874666670958202
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.2420533299446106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,float16,0,0.2793920040130615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,float16,0,0.2935306628545125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.030042665700117748
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.2519200046857198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,float16,0,0.3245226740837097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.2927146752675374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,float16,0,0.3338826497395833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,float16,0,0.14774399995803833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.12490133444468181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.30649600426356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.1306666632493337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,float16,0,0.15627732872962952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.21751999855041504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,float16,0,0.17198399702707926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.14360533157984415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.15260266264279684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,float16,0,0.12142399946848552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,float16,0,0.0804906686147054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.11376532912254333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,float16,0,0.08074133098125458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,float16,0,0.08772800366083781
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.07044800122578938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,float16,0,0.08738133311271667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.07904000083605449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,float16,0,0.04596266647179922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,float16,0,0.2281066576639811
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,float16,0,0.061674664417902626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,float16,0,0.04631466666857401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.058746665716171265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.03967999915281931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,float16,0,0.04879466692606608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.040778666734695435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.04400533437728882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,float16,0,0.04991999765237173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.04399466514587402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,float16,0,0.02757866680622101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.06754666566848755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,float16,0,0.027888000011444092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.03562666724125544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,float16,0,0.03322133421897888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,float16,0,0.02938666691382726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.027482666075229645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,float16,0,0.029765332738558452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.02749866743882497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.019354666272799175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,float16,0,0.019317333896954853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.07825600107510884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.023578666150569916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.019306667149066925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,float16,0,0.021429332594076794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,float16,0,0.019424000134070713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,float16,0,0.015034666905800501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,float16,0,0.015301333119471868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,float16,0,0.01516266663869222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.015381333728631338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,float16,0,0.17285333077112833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,float16,0,0.013354666531085968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,float16,0,0.019226666539907455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,float16,0,0.019466667125622433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,float16,0,0.013338666409254074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.21702400843302408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,float16,0,0.26834134260813397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,float16,0,0.2778559923171997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.22380799055099487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,float16,0,0.29477866490681964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,float16,0,0.2935573259989421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.24402666091918945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.2502239942550659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,float16,0,0.17745065689086914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.16140266259511313
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,float16,0,0.14131200313568115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.11434666315714519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,float16,0,0.14356266458829245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.11585066715876262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.12484266360600789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,float16,0,0.151119997104009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,float16,0,0.15333333611488342
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.12495467066764832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,float16,0,0.10015466809272766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,float16,0,0.07670400043328603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.06282133360703786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.06443733473618825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.08372267087300618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.06820799907048543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,float16,0,0.07986666758855183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,float16,0,0.08103999992211659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.06855999926726024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,float16,0,0.043696001172065735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.03756800045569738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,float16,0,0.04377066592375437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.04769066472848257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,float16,0,0.044693330923716225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.03793066740036011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,float16,0,0.04986133178075155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.03959999978542328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,float16,0,0.045781334241231285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,float16,0,0.027295999228954315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.024293333292007446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,float16,0,0.02738133321205775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.0249439999461174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.028991999725500744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.024149333437283833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,float16,0,0.02741333345572154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,float16,0,0.01912533367673556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.018165333817402523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,float16,0,0.019253333409627277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.01802666609485944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,float16,0,0.019178666174411774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,float16,0,0.01923199991385142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,float16,0,0.0798826664686203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,float16,0,0.015130666395028433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.015210667004187902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,float16,0,0.015125333021084467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,float16,0,0.015370666980743408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.039701332648595176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,float16,0,0.029370665550231934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,float16,0,0.021082667013009388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,float16,0,0.014346666634082794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,float16,0,0.017221332838137943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,float16,0,0.011877333124478659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,float16,0,0.011333333949247995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.011674666156371435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.02147199958562851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,float16,0,0.012383999923865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,0,0.023562667270501454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,0,0.02939733366171519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,float16,0,0.0703359991312027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.055914665261904396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,float16,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,0,0.016303999970356624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,float16,0,0.037418665985266365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.03245333333810171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,0,0.01331199953953425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.019578666736682255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,0,0.015024000157912573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.025754667818546295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.021514666577180225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,float16,0,0.0236160010099411
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,float16,0,0.01714666684468587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,0,0.01145600030819575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.009381333366036415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,float16,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,0,0.00921066664159298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,0,0.009312000125646591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,0,0.00916800027092298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,0,0.009328000247478485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,0,0.009338666374484697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,0,0.008853333070874214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,11.450448354085287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,11.344324747721354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,float16,0,15.553114573160807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,float16,0,15.7631467183431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,float16,0,15.976351420084635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,12.042250315348307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,6.707189559936523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,float16,0,7.848416010538737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,float16,0,7.5284372965494795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,6.269919713338216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,12.108277638753256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,float16,0,15.96551513671875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,5.67738151550293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,float16,0,7.46884282430013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,6.598288218180339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,float16,0,7.270575841267903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,6.007408142089844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,float16,0,3.6294987996419272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,4.076954523722331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,float16,0,7.6836802164713545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,3.656229337056478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,float16,0,3.532832145690918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,3.501354535420736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,float16,0,3.736805280049642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,3.7402451833089194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,float16,0,3.6809972127278647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,3.3370186487833657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,float16,0,2.032320022583008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,float16,0,1.777941385904948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,1.8951306343078613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,2.054080009460449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,1.804730733235677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,float16,0,4.082266807556152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,float16,0,1.9237066904703777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.6800319353739421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,float16,0,1.8710026741027832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,1.7869493166605632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,float16,0,1.8209120432535808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,6.404944101969401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,6.568880081176758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,float16,0,8.333114624023438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,float16,0,8.849871953328451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,float16,0,8.98515764872233
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.906005223592122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,float16,0,3.993610699971517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,float16,0,4.65447457631429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,3.774282773335775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,float16,0,4.060031890869141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,4.373562812805176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,6.970117568969727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,3.286975860595703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,float16,0,9.109631856282553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,float16,0,2.0179360707600913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,2.1090826988220215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,float16,0,4.330128033955892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,2.1371466318766275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.8077920277913413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,float16,0,2.340208053588867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,float16,0,4.22215461730957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,4.273680051167806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,float16,0,2.0448106129964194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.6971519788106282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,float16,0,2.2621973355611167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.9569600423177083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,1.868949254353841
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,float16,0,1.1459999879201253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,float16,0,2.192431926727295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.879642645517985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,1.10917329788208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,float16,0,1.2671840190887451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.9507466952006022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,float16,0,1.0979359944661458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,float16,0,1.1132373015085857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,1.0791040261586506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,0.9785652955373129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,float16,0,1.1421013673146565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,4.538266817728679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,4.660149256388347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,float16,0,5.81056022644043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,float16,0,5.9862931569417315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.973349253336589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,5.061749458312988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,float16,0,6.387664159138997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,float16,0,6.4483998616536455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,float16,0,3.24560546875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,3.028106689453125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,2.3595253626505532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,3.0522454579671225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,float16,0,2.8522186279296875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,float16,0,3.3183838526407876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,float16,0,3.099285443623861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,float16,0,1.451317310333252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,3.1704479853312173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,1.3161760171254475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,float16,0,1.4832533200581868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,float16,0,3.274314562479655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,1.560640017191569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,float16,0,1.7144586245218914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,2.6566346486409507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,1.3972105979919434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,float16,0,1.5899200439453125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.4037493069966633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,float16,0,0.75654403368632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,float16,0,1.5514987309773762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.7906560103098551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,1.5352427164713542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,0.8260746796925863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,float16,0,0.7863146464029948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.6659893194834391
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,float16,0,0.8929706414540609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.8334879875183105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,float16,0,0.8090720176696777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,0.794047991434733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,float16,0,0.8246826330820719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,6.055797576904297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,6.0607147216796875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,float16,0,7.56394640604655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,float16,0,8.181034723917643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,6.885567982991536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,6.692368189493815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,float16,0,8.604730606079102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,float16,0,8.49563217163086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,float16,0,3.674762725830078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,4.169317245483398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,3.0652907689412436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,3.9049173990885415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,float16,0,4.5639041264851885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,float16,0,4.219514528910319
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,3.968015988667806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,float16,0,1.8696479797363281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,float16,0,2.2780213356018066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,2.1835625966389975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,1.695807933807373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,3.4978933334350586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,float16,0,4.0803572336832685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.6287627220153809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.909989356994629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,float16,0,2.0441226959228516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,1.8668212890625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,float16,0,2.0593973795572915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,float16,0,3.796757380167643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,float16,0,1.1739839712778728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,float16,0,0.952618678410848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.8615146478017172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.8217600186665853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,float16,0,0.9813600381215414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,1.1033600171407063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,float16,0,1.0605279604593914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.9109813372294108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,0.9195893605550131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,float16,0,1.0523467063903809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.441269318262736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,float16,0,0.5087946653366089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,float16,0,0.6153653462727865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,float16,0,0.515066663424174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.44828800360361737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,float16,0,1.9045173327128093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,0.5848053296407064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.49504534403483075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,float16,0,0.5507200161616007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,float16,0,0.559008002281189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,0.5054506858189901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,3.475914637247721
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,float16,0,4.239263852437337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,float16,0,4.516357421875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.5977865854899087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,4.0917174021403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,float16,0,4.872111956278483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,float16,0,4.860613187154134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,float16,0,2.149797280629476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.7642347017923992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,4.086661338806152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,float16,0,2.2859999338785806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,float16,0,2.8067680994669595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.976378599802653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,2.9810508092244468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,2.294703960418701
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,float16,0,2.406533400217692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,float16,0,1.0980160236358643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,float16,0,2.4464054107666016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,2.080538590749105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,1.420021375020345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,float16,0,1.1309706370035808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.9512639840443929
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,float16,0,1.4197120666503906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,float16,0,1.2328746318817139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,float16,0,1.2256906827290852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,float16,0,0.5658133427302042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,1.1268959840138753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.492789347966512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,float16,0,0.7276586691538492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,0.7074560324350992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,float16,0,0.5831253528594971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.5142613252003988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,float16,0,0.6391306718190511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.5732959906260172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,float16,0,0.30823467175165814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,0.5727306604385376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.2717760006586711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,float16,0,0.38842666149139404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,0.3866986831029256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,float16,0,0.3150186737378438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.280623992284139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,float16,0,0.34310932954152423
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.31058667103449505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,float16,0,0.34242133299509686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.3163040081659953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,1.0214933554331462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,1.1491519610087078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.3105812072753906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,float16,0,3.9486878712972007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,3.4906078974405923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,float16,0,4.314650535583496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,4.345199902852376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,float16,0,0.6364746491114298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,float16,0,4.93234125773112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,float16,0,2.257263978322347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.6810293197631836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,float16,0,4.779882748921712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,2.024991989135742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,2.857365290323893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,float16,0,2.8911520640055337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,4.132810592651367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,2.2224319775899253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,float16,0,2.569983959197998
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,float16,0,2.626848061879476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,2.2377492586771646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,float16,0,1.0240533351898193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.8616639773050944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,float16,0,1.0800426801045735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,1.4674720764160156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,float16,0,1.444533348083496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,0.9805013338724772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,float16,0,1.2122613588968914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,1.0604053338368733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,float16,0,1.224608023961385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,1.148703972498576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,float16,0,0.5237066745758057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,float16,0,0.7323093414306641
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.45281068483988446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,0.7619199752807617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.4958719809850057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,float16,0,2.095989386240641
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,float16,0,0.6282133261362711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.5891520182291666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,float16,0,0.6112746795018514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,0.5671253204345703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,float16,0,0.28142400582631427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.24962133169174194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,0.3966826597849528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,float16,0,0.2890773415565491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.2638933261235555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,float16,0,0.32781867186228436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.3099946578343709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,float16,0,0.3326080044110616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,float16,0,0.15812266866366068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.1481119990348816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,float16,0,0.16406400005022684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,float16,0,0.21570134162902832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.15339733163515726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,float16,0,0.17349867026011148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.1699519952138265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.17274133364359537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,float16,0,0.5377973318099976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,float16,0,2.4217333793640137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,float16,0,2.4633545875549316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,2.00217072168986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.3165280024210612
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,2.147578716278076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,0.22259199619293213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,2.883381207784017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,float16,0,3.0266294479370117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,float16,0,0.18145066499710083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,float16,0,2.9579413731892905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,2.6432320276896157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,float16,0,1.8427306811014812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,float16,0,1.1913973490397136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,1.0359679857889812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,1.908506711324056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,float16,0,1.2730186780293782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,float16,0,1.4770240783691406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,1.1576586564381917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,1.431290626525879
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,float16,0,0.39297600587209064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,float16,0,0.6159626642862955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,1.335466702779134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.5425333182017008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,float16,0,0.6366933186848959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.5663146575291952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,0.71616530418396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,0.967146635055542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,float16,0,0.9376906553904215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,0.7349332968393961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,float16,0,0.7574346860249838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,float16,0,0.7397279739379883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.2879146734873454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,float16,0,0.3333439826965332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,float16,0,0.3221973379453023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,float16,0,0.39853334426879883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,0.5032960176467896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.3739733298619588
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,float16,0,0.3895893494288127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.3712533315022786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,float16,0,0.1760960022608439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.16119466225306192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,float16,0,0.18386665980021158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.16755733887354532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,0.27063467105229694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,float16,0,0.21465599536895752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.20961600542068481
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,float16,0,0.21262933810551962
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,float16,0,1.489024003346761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,float16,0,0.10629866520563762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,float16,0,0.15916267037391663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,float16,0,0.10770133137702942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.09946667154630025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,0.15227733055750528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.10269332925478618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.11397866408030193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,float16,0,0.1164959967136383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,float16,0,0.11699733138084412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.11574400464693706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.3056480089823405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,float16,0,0.48093334833780926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,2.0033814112345376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,float16,0,2.3361706733703613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.19909334182739258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,float16,0,2.5283946990966797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,2.1797173817952475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,float16,0,3.1332693099975586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,3.0520960489908853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,1.0120533307393391
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,float16,0,1.186021327972412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,float16,0,2.0566399892171225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,float16,0,1.2660799821217854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,2.207856019337972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,2.8126452763875327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,float16,0,3.0645440419514975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,1.1033066908518474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,float16,0,0.26497066020965576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,float16,0,1.597424030303955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,1.4366240501403809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,1.4494560559590657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,float16,0,1.626805305480957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,float16,0,0.5995306571324667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,float16,0,1.0432373682657878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.5268640120824178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,float16,0,0.6342720190684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,1.1086773077646892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,0.5754453341166178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,0.7230613231658936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,float16,0,0.30824534098307294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,float16,0,0.8232159614562988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,0.7437600294748942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.28101332982381183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,0.5694080193837484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,float16,0,0.5247040192286173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,float16,0,0.3263733386993408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,float16,0,0.4159359931945801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,float16,0,0.3964266777038574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,0.3991200129191081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,float16,0,0.17267733812332153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.15597866972287497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,0.2995520035425822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,float16,0,0.2752053340276082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,float16,0,0.178874671459198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.16546666622161865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,float16,0,0.2182240088780721
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.21028266350428262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,float16,0,0.22234666347503662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.2181439995765686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,float16,0,0.0981119970480601
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,float16,0,0.797269344329834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.09115733702977498
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.09748799602190654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,float16,0,0.10257066289583842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,0.16581867138544717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,float16,0,0.1537813345591227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.11267733573913574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,float16,0,0.11779733498891194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.11428266763687134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,float16,0,0.06328533093134563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.08657067020734151
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.05881066620349884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,float16,0,0.06497600177923839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.30267733335494995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.41353599230448407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,float16,0,0.07165333131949107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,float16,0,0.09526399771372478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.06841599941253662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,float16,0,0.07167999943097432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.07062933345635732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,float16,0,1.4615200360616047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,1.2678399880727131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,float16,0,1.5750932693481445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,float16,0,2.0586986541748047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,1.3985172907511394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,1.8329226175944011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,float16,0,0.1157973309357961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,0.6483626763025919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,float16,0,1.998949368794759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,float16,0,0.7393866380055746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.06057066718737284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,1.8869387308756511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,float16,0,0.7968800067901611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,1.5226453145345051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,0.7110773722330729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,float16,0,1.0527626673380535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,float16,0,1.386954625447591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,0.9254826704661051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,float16,0,1.0036853154500325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.34219733874003094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,float16,0,0.3810933430989583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.377621332804362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,float16,0,0.40534400939941406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,float16,0,0.5249866644541422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.516325314839681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,0.7732000350952148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,float16,0,0.6992800235748291
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,float16,0,0.5427680015563965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,float16,0,0.2006666660308838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,0.5220959981282552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.18841065963109335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,float16,0,0.21979200839996338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.20399999618530273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,float16,0,0.27847466866175336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,0.3997066815694173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.2826613386472066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.27210666735967
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,float16,0,0.3593386809031169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,float16,0,0.11296000083287557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,float16,0,0.12086400389671326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.1035040020942688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.11089600125948589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,float16,0,0.15242666999499002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,float16,0,0.19284266233444214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.1422719955444336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,0.9615360101064047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.1509119967619578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,float16,0,0.15405866503715515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.06422933439413707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,float16,0,0.11002666751543681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,float16,0,0.07091199855009715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.11786133050918579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.06701866785685222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,float16,0,0.0798933357000351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.07859200239181519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,float16,0,0.08071466783682506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.08080000181992848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,float16,0,0.046122665206591286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.04297600189844767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,float16,0,0.04769066472848257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,float16,0,0.05933333436648051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.0641546646753947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.04404800136884054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,float16,0,0.05199466645717621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,float16,0,0.05205333232879639
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.05106666684150696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,float16,0,0.2733653386433919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,0.21283199389775595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,float16,0,1.5539894104003906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,float16,0,0.06923200190067291
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,1.347856044769287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,float16,0,1.692581335703532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,1.520037333170573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,float16,0,2.3753013610839844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.05215999980767568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,float16,0,0.7867626349131266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,float16,0,2.217535972595215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,2.1496960322062173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,2.400335947672526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,1.84880526860555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,float16,0,0.8340480327606201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,0.7710613409678141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,0.6906293233235677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,float16,0,0.3951306740442912
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,1.0940319697062175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,float16,0,1.1589173475901287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,float16,0,1.2102239926656086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.36025599638621014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,0.9328853289286295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,float16,0,0.8196746508280436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.401962677637736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,float16,0,0.43905067443847656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,0.5504106680552164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,float16,0,0.5841546853383383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,float16,0,0.2130720019340515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,0.6213866472244263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.19184533754984537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,float16,0,1.6421972910563152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,float16,0,0.23034665981928507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,float16,0,0.4203733205795288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.21199466784795126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,0.47630401452382404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,float16,0,0.3149226705233256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,float16,0,0.119077334801356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.32544533411661786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.29721599817276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,1.0884533723195393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,0.24913599093755087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,float16,0,0.22179200251897177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,float16,0,0.12729600071907043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,float16,0,0.16345066825548807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.1672746737798055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,float16,0,0.1673120061556498
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.16035733620325723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,float16,0,0.5827946662902832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,float16,0,0.06742933392524719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.061706667145093284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.06706133484840393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,float16,0,0.12112533052762349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,float16,0,0.08276266853014629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.08294933537642162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,float16,0,0.09142933289210002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.08567466338475545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,float16,0,0.04401599864164988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.039893334110577904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.0682239979505539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,float16,0,0.04531733194986979
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,float16,0,0.05004266897837321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.050586665670077004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.10534399747848511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,float16,0,0.05075199902057648
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.05218133330345154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.12277332941691081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,float16,0,0.02758399893840154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.025461333493391674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,float16,0,0.028373333315054577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.03973866750796636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,float16,0,0.035631999373435974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.02741866558790207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.03142400085926056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,float16,0,0.03165333221356074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,float16,0,0.03142933299144109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.03150933235883713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.1357439955075582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,float16,0,0.0708426684141159
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,float16,0,0.31405333677927655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,float16,0,1.1626880168914795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,1.0196426709493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,float16,0,1.2949706713358562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,1.1947733561197917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,float16,0,2.0404319763183594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,1.9902079900105794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,float16,0,1.433194637298584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,float16,0,0.06774400174617767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,float16,0,0.5922133525212606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,1.6703039805094402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,1.8119254112243652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,float16,0,1.8447945912679036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,0.6050986846288046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,float16,0,0.6511733531951904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,0.5254133145014445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,float16,0,1.01528000831604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.27910399436950684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,0.9187413056691488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,float16,0,0.3041599988937378
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,float16,0,0.7255787054697672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,float16,0,0.35067200660705566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,0.3224266568819682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,0.8446186383565267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,float16,0,1.0270559787750244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,float16,0,0.48866132895151776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,0.47916801770528156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,0.9318453470865885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,0.43141865730285645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,float16,0,0.48398931821187335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,float16,0,0.18254933754603067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,0.4850240151087443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,float16,0,0.3726293245951335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,float16,0,0.27354133129119873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,float16,0,0.25939200321833294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,float16,0,0.19572800397872925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.27774399518966675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,float16,0,0.1662826637427012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.2710346579551697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,float16,0,0.09479999542236328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.08118933439254761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.16925867398579916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.0934399962425232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,float16,0,0.10188266634941101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.2244053284327189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.13340800007184347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,float16,0,0.1452906628449758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,float16,0,0.05319466690222422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.047194664676984154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.1216373344262441
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,float16,0,0.05699199934800466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.053690666953722634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,float16,0,0.10716799894968669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.07020799815654755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,float16,0,0.075013334552447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.07067200044790904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,float16,0,0.033370666205883026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.03102933367093404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,float16,0,0.06404800216356914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,float16,0,0.0354720006386439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.14968533317248026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.06028800209363302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.040607998768488564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,float16,0,0.04126933217048645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.04192000130812327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,float16,0,0.04158399999141693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,float16,0,0.021205333371957142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.033514666060606636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,float16,0,0.021349333226680756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.021488000949223835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,float16,0,0.027424000203609467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,float16,0,0.024933333198229473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.025583999852339428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,float16,0,0.0252960001428922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,float16,0,0.14521599809328714
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.14732266465822855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,float16,0,0.019205333044131596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,float16,0,0.02329600105683009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,float16,0,0.01932799940307935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,float16,0,0.020869334538777668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.021344001094500225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,float16,0,0.021242665747801464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.02144533395767212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,float16,0,0.46008535226186115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,0.42485864957173664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,float16,0,0.5235146681467692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.01940800001223882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,0.5091413259506226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.02643200010061264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.025818665822347004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,float16,0,0.06880000233650208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,float16,0,0.8758827050526937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,float16,0,0.6616160074869791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,0.7926560242970785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,0.9491146405537924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,float16,0,0.8891306718190511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,float16,0,0.24424533049265543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,0.9164799849192301
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.22215465704600015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.27113600571950275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,float16,0,0.2808319926261902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,float16,0,0.4310506582260132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,float16,0,0.4152586857477824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,float16,0,0.12973866860071817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,float16,0,0.33796266714731854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.11883733669916789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,0.4363040129343669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,0.40278398990631104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,float16,0,0.15224533279736838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.1405226687590281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,float16,0,0.23702933390935263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.24951465924580893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,float16,0,0.2156320015589396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,float16,0,0.07281066477298737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.06464533507823944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.23880000909169516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.2083359956741333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,float16,0,0.17700799306233725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,float16,0,0.08120533327261607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.07670933504899342
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,float16,0,0.11855467160542806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.12006400028864543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,float16,0,0.12570666273434958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.12935466567675272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,float16,0,0.041749333341916404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,0.448304017384847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.039279999832312264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,float16,0,0.09754133224487305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,float16,0,0.04610133171081543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,float16,0,0.062218666076660156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,float16,0,0.06427733103434245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.061834668119748436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.06260266900062561
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,float16,0,0.025818665822347004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,float16,0,0.027210667729377747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.05298133194446564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,float16,0,0.05074666440486908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.03565333286921183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,float16,0,0.033626665671666466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,float16,0,0.03342933456103007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.03548266738653183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,float16,0,0.017258666455745697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,float16,0,0.019152000546455383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.01929066702723503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,float16,0,0.025616000096003216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.03155199935038885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.02309866746266683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.02351466566324234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,float16,0,0.021205333371957142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.015461333096027374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.023541333774725597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,float16,0,0.015365333606799444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.015418666104475657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.11331733067830403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.04605866471926371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,float16,0,0.01714666684468587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,float16,0,0.01701333373785019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,float16,0,0.01543466622630755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.015285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.01807466646035512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.027642667293548584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,float16,0,0.015194666882356008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,float16,0,0.014981333166360855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,float16,0,0.015103999525308609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,float16,0,0.021194666624069214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,float16,0,0.2875093420346578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.26446932554244995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.3132266600926717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,float16,0,0.3259200056393941
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,float16,0,0.48476799329121906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,0.4572266737620036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,float16,0,0.4551733334859212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,float16,0,0.015285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,float16,0,0.3585226535797119
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,0.46393601099650067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,0.4238239924112956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,float16,0,0.1562026639779409
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.14199466506640115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,float16,0,0.17147199312845865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,float16,0,0.25388266642888385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,float16,0,0.19338667392730713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,float16,0,0.08674666285514832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,float16,0,0.24582399924596152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.16217066844304404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.25563732782999676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.24657599131266275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.07834666470686595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,float16,0,0.0965226689974467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,float16,0,0.13150933384895325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.14301333824793497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.08784000078837077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.13075733184814453
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,float16,0,0.12893333037694296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,float16,0,0.11185066898663838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.04177600145339966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.1153546671072642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,float16,0,0.046037331223487854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,float16,0,0.049866666396458946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.04797866443792979
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,float16,0,0.06655466556549072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.06437333424886067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.2184213399887085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,float16,0,0.06863999863465627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,float16,0,0.029824001093705494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.027503999571005504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,float16,0,0.054645334680875145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.05613866448402405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.02945599953333537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,float16,0,0.03670933345953623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.03773866593837738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,float16,0,0.030960001051425934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,float16,0,0.03709333389997482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,float16,0,0.018992000569899876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.019274666905403137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,float16,0,0.023215999205907185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,float16,0,0.02552533398071925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,float16,0,0.01942933350801468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.023306667804718018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,float16,0,0.02334933231274287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.023685333629449207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,float16,0,0.017429333180189133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,float16,0,0.015253332753976187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.021514666577180225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.06648533542950948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.015370666980743408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,float16,0,0.01293333371480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,float16,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.01646399994691213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.0378506655494372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,float16,0,0.013264000415802002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.03170666595300039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,float16,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,float16,0,0.24516266584396362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.21517332394917807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,float16,0,0.2614453236262004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.23567465941111246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,float16,0,0.3372053305308024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.3456053336461385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.33453865845998126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,float16,0,0.32841600974400836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,float16,0,0.23225067059199014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.25650666157404584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,float16,0,0.13450666268666586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.11332266529401143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,float16,0,0.1425440013408661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.17841599384943643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.13517333070437113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,float16,0,0.133925328652064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,float16,0,0.17911465962727866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,float16,0,0.18370133638381958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,float16,0,0.07035199801127116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.16909333070119223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.08306666711966197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.0672266681989034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,float16,0,0.09270399808883667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.06213866670926412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,float16,0,0.07486933469772339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,float16,0,0.0934879978497823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.08462933699289958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,float16,0,0.04080000023047129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.035786665976047516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,float16,0,0.04204800228277842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.06345066428184509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,float16,0,0.04798933366934458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.046021332343419395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,float16,0,0.04796266555786133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.04663466910521189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,float16,0,0.025381334125995636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,float16,0,0.027045334378878277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.02513066679239273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.12719466288884482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,float16,0,0.033488000432650246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.029285334050655365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.029445332785447437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,float16,0,0.029653333127498627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,float16,0,0.021125334004561108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,float16,0,0.01738133281469345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.02359466751416524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,float16,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.019039999693632126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.03772799919048945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,float16,0,0.07173333565394084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.01905599981546402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,float16,0,0.014991999914248785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.015290666371583939
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.03791466603676478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,float16,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.014032000054915747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.012138667205969492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,float16,0,0.013264000415802002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,float16,0,0.012304000556468964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,float16,0,0.011551999797423681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.0143306665122509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,float16,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,float16,0,0.012858666479587555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,float16,0,0.012768000364303589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,float16,0,0.033471999069054924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,float16,0,0.23018133640289307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.19273066520690918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.2008799910545349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,float16,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,float16,0,0.2720266580581665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.24413333336512247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,float16,0,0.2810773253440857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.09943466385205586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.2533973256746928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,float16,0,0.1812266707420349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,float16,0,0.12703466415405273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.17230399449666342
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.10515200098355611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,float16,0,0.14433599511782327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.12191999951998393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,float16,0,0.06660800178845723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.12424533565839131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.08865066369374593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.05601066847642263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,float16,0,0.09277333815892537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,float16,0,0.06773866713047028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.05816533168156942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,float16,0,0.2360586722691854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.06465599934260051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.06620799998442332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,float16,0,0.038293334345022835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,float16,0,0.047007997830708824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.03309866786003113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.04706133405367533
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,float16,0,0.03968533376852671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,float16,0,0.12434132893880208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,float16,0,0.04251199960708618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.038431999584039055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,float16,0,0.0417546679576238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.03526400029659271
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.039546666045983635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.022287999590237934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,float16,0,0.023962666591008503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,float16,0,0.024703999360402424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.029359998802344005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.022954667607943218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,float16,0,0.02731200059254964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,float16,0,0.025568000972270966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.023354666928450268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,float16,0,0.02534399926662445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,float16,0,0.14566933115323386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.025445332129796345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,float16,0,0.01810666670401891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,float16,0,0.016901332885026932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.015376000354687372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,float16,0,0.07392533123493195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,float16,0,0.017125333348910015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,float16,0,0.07424533367156982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,float16,0,0.01756799966096878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,float16,0,0.013376000026861826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,float16,0,0.012026666353146235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.011445333560307821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,float16,0,0.013503999759753546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,float16,0,0.011445333560307821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.011359999577204386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.012234666695197424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,float16,0,0.22123199701309204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.012543999900420507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.17492800951004028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,float16,0,0.2239840030670166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.17998933792114258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,float16,0,0.2458453377087911
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.19985065857569376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,float16,0,0.1477013329664866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,float16,0,0.24650132656097412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.20753065745035806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,float16,0,0.11738666892051697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.0942133367061615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,float16,0,0.1174720029036204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.13104533155759177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,float16,0,0.12506666779518127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.09635733564694722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,float16,0,0.12757333119710287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.10542399684588115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,float16,0,0.06464000046253204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.05213866631189982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,float16,0,0.08079466720422109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,float16,0,0.06578133503595988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.05416533350944519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,float16,0,0.06839466591676076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.05801066756248474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,float16,0,0.06854400038719177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.058117335041364036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,float16,0,0.04165333261092504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,float16,0,0.037578667203585304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.039647998909155525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.031685332457224526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.03134933362404505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,float16,0,0.038912000755469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.03385066737731298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,float16,0,0.039162665605545044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.03383466601371765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.02144533395767212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,float16,0,0.025263999899228413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.10542399684588115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,float16,0,0.023423999547958374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.02147199958562851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,float16,0,0.023621333142121632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.02162666618824005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,float16,0,0.02325333406527837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.02184533327817917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,float16,0,0.011776000261306763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,float16,0,0.016997333616018295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.015376000354687372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.015386667102575302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,float16,0,0.03772266705830892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,float16,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,float16,0,0.023599999646345775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,float16,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.06664533416430156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.011567999919255575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,float16,0,0.017050666113694508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,float16,0,0.011343999455372492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,float16,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,float16,0,0.01202133297920227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,0,0.017210666090250015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,0,0.01922133316596349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.018624000251293182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,0,0.029765332738558452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.04362666606903076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,float16,0,0.05778133372465769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,0,0.014597332725922266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,0,0.01904533306757609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,float16,0,0.02940800040960312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,0,0.01523200049996376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.009941333283980688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.025754667818546295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.01166933278242747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,0,0.009216000015536943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,float16,0,0.013354666531085968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,0,0.009541333342591921
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,0,0.00979200005531311
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,0,0.009898666913310686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.011882666498422623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,float16,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.009850666547815004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,0,0.009029333169261614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,0,0.009178666397929192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.009589333087205887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,0,0.009290666629870733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.009226666763424873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,float16,0,0.015423999478419622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,8.60200564066569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,8.661850611368815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,float16,0,11.571732838948568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,float16,0,11.756202697753906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,9.492277145385742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,5.286848068237305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,float16,0,12.318655649820963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,float16,0,6.228058497111003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,float16,0,5.888570785522461
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,4.636426607767741
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,float16,0,11.975723266601562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,9.196266810099283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,float16,0,5.558298746744792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,4.337610562642415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,float16,0,5.6877492268880205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,4.955487887064616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,float16,0,3.3917067845662436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,4.803002675374349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,2.72270933787028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,float16,0,5.8272959391276045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,2.192357381184896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,float16,0,3.171189308166504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,2.2398667335510254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,float16,0,2.785909334818522
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,2.4330933888753257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,float16,0,2.877589225769043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,float16,0,2.8300692240397134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,float16,0,1.5718612670898438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,2.8316799799601235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,float16,0,1.400261402130127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,1.4554826418558757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,float16,0,1.4152480761210124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,1.2975839773813884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,float16,0,1.5025067329406738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.291962703069051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,float16,0,1.4901119867960613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,1.375775973002116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,4.943685213724772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,float16,0,6.2949174245198565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,5.147125244140625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,1.6860747337341309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,float16,0,6.403439839680989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,5.587584177652995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,float16,0,6.819520314534505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,float16,0,6.673967997233073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,float16,0,3.06058661142985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,3.3135147094726562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,float16,0,3.150773366292318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,3.2880051930745444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,float16,0,4.020394643147786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,5.497376124064128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,3.212186813354492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,float16,0,3.622629483540853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,float16,0,1.5684906641642253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,1.371455987294515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,3.366069475809733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,float16,0,1.8361600240071614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,1.9289813041687012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,2.9517173767089844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,float16,0,3.3846667607625327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,float16,0,1.663376013437907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,1.4964693387349446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,float16,0,1.686954657236735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.5304373105367024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,float16,0,0.8366773128509521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,float16,0,0.9737706979115804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.7399413585662842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,1.5549386342366536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,float16,0,1.679968039194743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.7364959716796875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,0.971407969792684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,float16,0,0.9076639811197916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,0.7950133482615153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,0.8048373063405355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,float16,0,0.9000746409098307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,3.5016425450642905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,float16,0,4.212037404378255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,3.6345227559407554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,float16,0,4.360101381937663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,3.9624001185099282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,float16,0,4.795653343200684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,float16,0,4.741365432739258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,float16,0,0.8272373676300049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,float16,0,2.158970673878988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,float16,0,2.6702025731404624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,2.0045013427734375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,2.7216161092122397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,4.394869486490886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.8513654073079426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,float16,0,2.4054346084594727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,2.0475573539733887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,float16,0,2.4039360682169595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,2.167445341746012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,1.267797311147054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,float16,0,1.12664000193278
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.9400746822357178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,float16,0,1.1395093599955242
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.971338669459025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,float16,0,1.2179839611053467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,1.126421372095744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,1.084549347559611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,float16,0,1.231765349706014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,float16,0,0.587007999420166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.5225173234939575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,float16,0,0.7500106493631998
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,0.6790773073832194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,float16,0,0.604634682337443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.5282293160756429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.596288005510966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,float16,0,0.6443626483281454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,0.5937813520431519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,float16,0,0.6592853466669718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,float16,0,1.349503993988037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,4.589141209920247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,float16,0,5.580912272135417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.759594599405925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,float16,0,5.823525110880534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,5.485445022583008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,float16,0,6.501167933146159
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,float16,0,2.4145973523457847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,2.332080046335856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,float16,0,3.070624033610026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,float16,0,6.427157084147136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.4148213068644204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,float16,0,3.7150561014811196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,5.383893330891927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,float16,0,3.210341453552246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,3.1981334686279297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,float16,0,3.1666879653930664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,2.83625062306722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,1.71833070119222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,float16,0,1.8508480389912922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,float16,0,1.4419840176900227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,1.2242826620737712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.2520853678385417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,float16,0,1.5688640276590984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.422309398651123
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,float16,0,1.6216746966044109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,float16,0,1.6053333282470703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.6384160121281942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,1.4306294123331706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,float16,0,0.745306650797526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,float16,0,0.9394026597340902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,float16,0,0.7714506785074869
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.6604959964752197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,float16,0,0.8452640374501547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.7727359930674235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,float16,0,0.8382240136464437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,float16,0,0.39902400970458984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.3551786740620931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,float16,0,0.5030986467997233
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,float16,0,0.41074132919311523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,0.48906131585439044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.3651626507441203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,float16,0,0.44970667362213135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.4014773368835449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,float16,0,0.4460373322168986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,0.42484267552693683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,3.3713385264078775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,float16,0,3.014239947001139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,0.9421119689941406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.738778750101725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,float16,0,3.2803627649943032
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,float16,0,3.3706401189168296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.8557761510213218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,0.7997173468271891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.4002505938212078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,float16,0,1.6643625895182292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,float16,0,3.852458635965983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,3.451343854268392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,float16,0,2.4571359952290854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,3.3532158533732095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,2.2770773569742837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,float16,0,1.696730613708496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.5556799570719402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,1.7628374099731445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,float16,0,2.0828000704447427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,float16,0,1.97927459081014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,1.7887892723083496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,float16,0,0.8539893627166748
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,float16,0,1.1403253078460693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.8575413227081299
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,1.191146691640218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.7689706484476725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,float16,0,0.8806560039520264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,float16,0,0.9987626870473226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,0.8981280326843262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,float16,0,0.4471679925918579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,0.9134453137715658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.3969120184580485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,float16,0,0.4609973430633545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,float16,0,0.5945333242416382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,0.588645339012146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.420687993367513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,float16,0,3.782287915547689
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,float16,0,0.5183093150456747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,float16,0,0.5206506649653116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,0.4891573190689087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,float16,0,0.24812267223993936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,float16,0,0.3259413242340088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.2302079995473226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,0.3272426724433899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,float16,0,0.25306665897369385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.2350026567776998
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,float16,0,0.28385066986083984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,float16,0,0.2863840063412984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.26314665873845416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.26946133375167847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,float16,0,0.9865333239237467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,float16,0,3.09665584564209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.6421119372049966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,float16,0,3.2206719716389975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,float16,0,3.9718027114868164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.4923786719640096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,float16,0,1.5717919667561848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,3.32423464457194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,float16,0,2.397088050842285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,3.642762819925944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,float16,0,3.796367963155111
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,2.4483253161112466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.3506986300150554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.4346027374267578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,float16,0,1.6438080469767253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,float16,0,1.9671680132548015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,float16,0,0.8021706740061442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,2.833749453226725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,1.8759946823120117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,float16,0,2.0127573013305664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,1.8047787348429363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,1.212106704711914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,float16,0,1.188362677892049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,float16,0,0.8393653233846029
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,float16,0,0.9896586736043295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,0.9401386578877767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,float16,0,1.0178933143615723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,float16,0,0.4135040044784546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,0.9247039953867594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,float16,0,0.6117279926935831
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,0.6300373474756876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.38734932740529376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.39924267927805585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,float16,0,0.4328373273213704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,float16,0,0.5143733421961466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.5035839875539144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,float16,0,0.22405866781870523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,0.49830400943756104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,float16,0,0.2359679937362671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,float16,0,0.32524265845616657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,0.33936532338460285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,float16,0,0.2739253242810567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.2539626757303874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,float16,0,0.2731093366940816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.2757280071576436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,float16,0,0.1876373291015625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,float16,0,0.13487999637921652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.701141357421875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.7497759660085043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,0.18956265846888223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,float16,0,0.13970133662223816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.1262986660003662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.13242133458455405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,float16,0,0.15185067057609558
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,float16,0,0.153738667567571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.15018666783968607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,float16,0,1.8591893513997395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,float16,0,0.5062133471171061
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,1.6277440388997395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,float16,0,1.9492799441019695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,1.7557226816813152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.22016000747680664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,2.184602737426758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,float16,0,2.4574400583902993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,float16,0,1.5332372983296711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,1.6039946873982747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,float16,0,2.407909393310547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,2.234922726949056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.14898666739463806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,float16,0,0.9520906607309977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,0.8335039615631104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,float16,0,0.9923413594563802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,0.8957066535949707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,float16,0,1.2234933376312256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,1.2003413041432698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,1.1603999932607014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,float16,0,0.48524800936381024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.4416053295135498
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.20960533618927002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,float16,0,0.7742719650268555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,0.8204320271809896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,float16,0,0.5107680161794027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.4714346726735433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,float16,0,0.6332266728083292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.6152480045954386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,float16,0,0.6128960053126017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,0.5958293279012045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,float16,0,0.2597653269767761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,float16,0,0.4057493209838867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.23983999093373617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,0.43326934178670246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,float16,0,0.27476799488067627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.26148800055185956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,float16,0,0.334112008412679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,float16,0,0.14752533038457236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.3291146755218506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.33236799637476605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,float16,0,0.3347413142522176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.13730133573214212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,float16,0,0.15069866180419922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.14632532993952432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,0.2352959911028544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.17059199015299478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.1757919987042745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,float16,0,0.18453866243362427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,float16,0,0.09451199571291606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,float16,0,0.13272533814112344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.08795733253161113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.12593066692352295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,float16,0,1.2194453080495198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,float16,0,0.09596266349156697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.09212799866994222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,float16,0,0.10460799932479858
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.10449600219726562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,float16,0,0.10547199845314026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.10566932956377666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,float16,0,1.8553706804911296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,1.6610453923543294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,1.8394986788431804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,float16,0,1.982319990793864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,float16,0,2.6498133341471353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,2.347482681274414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,float16,0,0.2225280006726583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,float16,0,0.1821546753247579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,float16,0,0.952303965886434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,0.8437226613362631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,float16,0,1.0066986878712971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,0.9334879716237386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,2.4588534037272134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,float16,0,2.6506080627441406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,float16,0,1.7349653244018555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,1.2689440250396729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,float16,0,1.334671974182129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,0.9501706759134928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,1.3620212872823079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.4384586811065674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,float16,0,0.48579200108846027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,float16,0,0.5179946819941202
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.4848693211873372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,float16,0,0.678266684214274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,0.6366560061772665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,float16,0,0.6535253524780273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,0.645359992980957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,float16,0,0.2537226676940918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.23849066098531088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.25834665695826214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,float16,0,0.4503786563873291
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,float16,0,0.3627893527348836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.3700480063756307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,float16,0,0.3428746859232585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,float16,0,1.3594613075256348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.3440213203430176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,0.26320000489552814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,float16,0,0.24117332696914673
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.13344533244768778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,float16,0,0.8710827032725016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,float16,0,0.154858668645223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.14046933253606161
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,float16,0,0.18976000944773355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.19087467590967813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,float16,0,0.18872533241907755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.18970666329065958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,float16,0,0.08495466907819112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.14256532986958823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.08032000064849854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,float16,0,0.14074132839838663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,float16,0,0.0885706643263499
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.08474666873613994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,float16,0,0.10127466917037964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.10293866197268169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,float16,0,0.10295466581980388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,0.4922933181126912
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.10473600029945374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.05409066875775655
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,float16,0,0.058149332801500954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,float16,0,0.07397866745789845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.07852800190448761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,float16,0,0.05933333436648051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.05622399846712748
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,float16,0,0.06572799881299336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.06490133206049602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,1.867685317993164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.06523199876149495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,float16,0,0.06573866804440816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,float16,0,0.14261866609255472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,float16,0,1.1713866392771404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,1.0654719670613606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,1.1971200307210286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,float16,0,1.8050187428792317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,1.6548479398091633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,float16,0,0.5995093186696371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,float16,0,0.27063467105229694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,float16,0,1.7001172701517742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,1.6851946512858074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,1.3087093035380046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,0.5517760117848715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,float16,0,0.6411679983139038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,float16,0,1.1900800069173176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,0.6133493185043335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,float16,0,0.8802773157755533
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,0.9452160199483236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,float16,0,0.9110986391703287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,float16,0,0.31166932980219525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,float16,0,0.6036959886550903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,0.8669280211130778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.29375465710957843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,0.6703360080718994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,float16,0,0.34143467744191486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.32687467336654663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,float16,0,0.4549493392308553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,float16,0,1.2569866975148518
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.45711998144785565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,float16,0,0.16990933815638223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,0.45125865936279297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,float16,0,0.24589866399765015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,float16,0,0.3141866723696391
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.254746675491333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,0.3482026656468709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,float16,0,0.18409599860509238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.24398932854334512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,float16,0,0.09665066997210185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,float16,0,0.24073066314061484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,float16,0,0.17221333583196005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,float16,0,0.10174933075904846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.09210667014122009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.10019733508427937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.12532266974449158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,float16,0,0.13410133123397827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,float16,0,0.10355732838312785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.09704533219337463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,float16,0,0.06099733213583628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.05820266902446747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,float16,0,0.43862934907277423
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,float16,0,0.06436799963315327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.1602186659971873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.0621066689491272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,float16,0,0.07195733487606049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.07381866872310638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.17983466386795044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.07477866609891255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.03573333223660787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.05593599875768026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,float16,0,0.03812266637881597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.037578667203585304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.18803733587265015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,float16,0,0.0436106671889623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.04384533564249674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,float16,0,0.04378666480382284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,float16,0,0.13101866841316223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.13168000181516012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,float16,0,1.2306400140126545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,1.1626826922098796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,float16,0,1.3966933886210124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,1.344058672587077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,float16,0,0.07317333420117696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,float16,0,0.04808533191680908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,float16,0,2.1100266774495444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.043968002001444496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,float16,0,1.412943998972575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,2.16756264368693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,1.6152532895406086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,float16,0,2.0786986351013184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,1.9619626998901367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,0.5942293405532837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,float16,0,0.6275786558787028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,float16,0,0.6976479689280192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,0.6791840394337972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,float16,0,1.0631306966145833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,float16,0,0.0379573330283165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,1.1046559810638428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,float16,0,0.3290453354517619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,1.0244053204854329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.3095360000928243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,0.8144000371297201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.36132800579071045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,float16,0,0.36190398534138996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,float16,0,0.5142346620559692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.5019413232803345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,float16,0,0.5009973446528116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,0.5166879892349243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.1717066764831543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,float16,0,0.3720266819000244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,float16,0,0.18023467063903809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,float16,0,0.1958400011062622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,0.42078399658203125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.1933280030886332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,float16,0,0.28387733300526935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.27156267563501996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,float16,0,0.2656159996986389
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,float16,0,0.10017599662144978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.09452266494433086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,float16,0,0.11083199580510457
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,float16,0,0.9783039887746176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,0.22383999824523926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,float16,0,0.7156000137329102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.10244799653689067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,float16,0,0.15018666783968607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,float16,0,0.14593066771825156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.16049066185951233
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,float16,0,0.060677334666252136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,float16,0,0.10975466171900432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.05677866439024607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.12111999591191609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,float16,0,0.06409066418806712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.06278400123119354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,float16,0,0.07518933216730754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.08009600142637889
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,float16,0,0.07669866581757863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.08062933385372162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,float16,0,0.0421066681543986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,float16,0,0.043247997760772705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.042026668787002563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,float16,0,0.05634133517742157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.06436799963315327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,float16,0,0.04952533543109894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.2847306728363037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.050010666251182556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,float16,0,0.048938666780789696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.05150933563709259
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,float16,0,0.027903998891512554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.0273333340883255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,float16,0,0.1962560017903646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,float16,0,0.029605334003766377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,float16,0,0.03588266670703888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,float16,0,0.031445334355036415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.031871999303499855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.13503467043240866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,float16,0,0.03202133377393087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,float16,0,0.9591573079427084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,0.9086399873097738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.03957333415746689
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,float16,0,1.0968213081359863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,1.0838080247243245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,float16,0,1.8397067387898762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,0.4671093225479126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,1.9004054069519043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,float16,0,1.2640213171641033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,float16,0,0.4992693265279134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,1.4703307151794434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,float16,0,1.7203946113586426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,float16,0,0.5698186556498209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,1.7063306172688801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,0.5589866638183594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.04012266546487808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,float16,0,0.9291573365529379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,float16,0,0.25917333364486694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,0.9711573123931885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,float16,0,0.9294880231221517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.2444053292274475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,float16,0,0.6444960037867228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,float16,0,0.2926186720530192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.2889066735903422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,0.4666506846745809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,float16,0,0.43858134746551514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,0.38522664705912274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,float16,0,0.3333706855773926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,float16,0,0.47060267130533856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.43697067101796466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,float16,0,0.16034666697184244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.15660267074902853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,0.9899679819742838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,float16,0,0.24796799818674722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.23875200748443604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,float16,0,0.14290133118629456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.1346560021241506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,float16,0,0.23148800929387411
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.24038932720820108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.20466132958730063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.07433600227038066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,float16,0,0.07912533481915791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,float16,0,0.0899786651134491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,float16,0,0.17725332578023276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,float16,0,0.12772267063458762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.12778666615486145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.08295466502507527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.14061866203943887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,float16,0,0.13410133123397827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.04429866870244344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,float16,0,0.10413333773612976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,float16,0,0.05197866757710775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.05035200218359629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,float16,0,0.06321600079536438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.1092746655146281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.06726400057474773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.06817066669464111
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,float16,0,0.06448000172773997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,float16,0,0.03235200047492981
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.031717332700888314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.03377600014209747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.05393599967161814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,float16,0,0.04598933458328247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,float16,0,0.03958400090535482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.041696002086003624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.04194133480389913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,float16,0,0.03990933299064636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,float16,0,0.021509334444999695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,0.7473013401031494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.02130666623512904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,float16,0,0.029765332738558452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.0340693344672521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,float16,0,0.023210667073726654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,float16,0,0.025642665723959606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.023290666441122692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,float16,0,0.025514667232831318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,float16,0,0.021242665747801464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,float16,0,0.025407999753952026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.026000000536441803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,float16,0,0.04679466784000397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.01937066639463107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,float16,0,0.02125866711139679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,float16,0,0.022613334159056347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.021536000072956085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,float16,0,0.023205332458019257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.023445333043734234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,float16,0,0.03379199902216593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,float16,0,0.4079893430074056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,0.398906668027242
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,float16,0,0.48395733038584393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.027301333844661713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,0.48020267486572266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.020213333268960316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,float16,0,0.7898986339569092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,0.8525760173797607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,float16,0,0.21328532695770264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,float16,0,0.5937973260879517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.20715200901031494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,0.8030347029368082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,0.706666628519694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,float16,0,0.2566346724828084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.25514666239420575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,float16,0,0.4012960195541382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.4015786647796631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,0.4068106810251872
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,float16,0,0.3078400095303853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,float16,0,0.11946666240692139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,0.36475733915964764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.1135040024916331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,float16,0,0.136053333679835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,float16,0,0.3984373410542806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,float16,0,0.22436267137527466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.1362986663977305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,float16,0,0.20388267437616983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.21723200877507529
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,float16,0,0.16681599617004395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,float16,0,0.06820266445477803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.06444266438484192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,float16,0,0.8399999936421713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,float16,0,0.08080533146858215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.07261866827805837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.19328532616297403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,float16,0,0.11750933527946472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,float16,0,0.12117333213488261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,float16,0,0.039290666580200195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.03782399992148081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,float16,0,0.08876267075538635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.1016373336315155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,float16,0,0.04377600053946177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.043968002001444496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.13051733374595642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,float16,0,0.05612266560395559
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.0602400004863739
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.0618399977684021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,float16,0,0.058261334896087646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,float16,0,0.02758399893840154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,float16,0,0.029626667499542236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,float16,0,0.04330666859944662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.030106666187445324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.05022400120894114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,float16,0,0.035631999373435974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.22177066405614218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.0378506655494372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,float16,0,0.035760000348091125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.03923200070858002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,float16,0,0.019007999449968338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,float16,0,0.02046400060256322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.0317546675602595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,float16,0,0.02313599983851115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.023573334018389385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.11129066348075867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,float16,0,0.021477334201335907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.023599999646345775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,float16,0,0.019146667172511418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,float16,0,0.019088000059127808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.01941866676012675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,float16,0,0.0170666662355264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,float16,0,0.01728533332546552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,float16,0,0.01720000058412552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,float16,0,0.025605333348115284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,float16,0,0.023285334308942158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,float16,0,0.2476053237915039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,float16,0,0.29047467311223346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,float16,0,0.019482667247454327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.2850186626116435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.23823465903600058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,float16,0,0.4337013165156047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.4261386791865031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,float16,0,0.43775467077891034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,float16,0,0.1346399982770284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,0.4469173351923625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,0.3768373330434163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.12628266215324402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,float16,0,0.15732266505559286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.14878400166829428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,float16,0,0.32374932368596393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,float16,0,0.23875733216603598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,float16,0,0.16936532656351724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.19809067249298096
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,float16,0,0.23653332392374674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,float16,0,0.07634666562080383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,float16,0,0.08499200145403545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.25433067480723065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,float16,0,0.12520533800125122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.13503999511400858
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.11270933349927266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,float16,0,0.12415466705958049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.07785066465536754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.06881600121657054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.1051093339920044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,float16,0,0.042805333932240806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.04154133299986521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.04615999758243561
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,float16,0,0.09230933586756389
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,float16,0,0.058864002426465355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.2555466691652934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.06346133351325989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.06419200201829274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,float16,0,0.06233599781990051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,float16,0,0.02752533306678136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,float16,0,0.02762666592995326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,float16,0,0.042394667863845825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.027658666173617046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.03605333218971888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,float16,0,0.033615998923778534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.05002133548259735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,float16,0,0.03375466664632162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.037615999579429626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,float16,0,0.021146667500336964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,float16,0,0.02733866622050603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,float16,0,0.023168000082174938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.03370666752258936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.02125866711139679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,float16,0,0.02348266790310542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.025146665672461193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,float16,0,0.01370666672786077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.01357866699496905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,float16,0,0.013477332890033722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,float16,0,0.017258666455745697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,float16,0,0.015402667224407196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.01526933287580808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,float16,0,0.016000000139077503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,float16,0,0.04637866715590159
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,float16,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.02589866767326991
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.013978666315476099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,float16,0,0.01941866676012675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.0252960001428922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,float16,0,0.015082667271296183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,float16,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.013445333888133367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.17968000968297324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,float16,0,0.1941493352254232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,float16,0,0.2135039965311686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,float16,0,0.29015467564264935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,float16,0,0.20191999276479086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.2831839919090271
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.22393065690994263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.3083146611849467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,float16,0,0.2797546585400899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.0934986670811971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.10455466310183208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,float16,0,0.15366400281588236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,float16,0,0.1572213371594747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,float16,0,0.1163200040658315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.16164799531300864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.2057759960492452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,float16,0,0.05765866736570994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.05412266651789347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,float16,0,0.11034666498502095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,float16,0,0.062133332093556724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.11700800061225891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.05983999868233999
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,float16,0,0.07374399900436401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.07508266468842824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,float16,0,0.07960000137488048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.07669333120187123
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,float16,0,0.03555200000603994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,float16,0,0.10850666960080464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.05606933434804281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,float16,0,0.036117332677046456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,float16,0,0.042863999803860985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.03575466573238373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.04397333165009817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,float16,0,0.043290664752324425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,float16,0,0.02142400046189626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.0440533310174942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.021365332106749218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.03365333378314972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,float16,0,0.029296000798543293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,float16,0,0.02345066765944163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,float16,0,0.025589334468046825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.025472000241279602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,float16,0,0.025653332471847534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.027301333844661713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.14552000164985657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.023391999304294586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,float16,0,0.01711999997496605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,float16,0,0.01922133316596349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,float16,0,0.019071999937295914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,float16,0,0.014869333555301031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,float16,0,0.05077866713205973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.013461332768201828
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,float16,0,0.021162666380405426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.012576000144084295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,float16,0,0.011957333733638128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,float16,0,0.011002667248249054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.01239466667175293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,float16,0,0.011493333925803503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,float16,0,0.01157333329319954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,float16,0,0.18075199921925864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.15065067013104758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.16326933105786642
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,float16,0,0.19213332732518515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,float16,0,0.2265333334604899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,float16,0,0.2327679991722107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,float16,0,0.14314132928848267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,float16,0,0.0956053336461385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.20762133598327637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.21607466538747153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,float16,0,0.099781334400177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.10354666908582051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.08740267157554626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,float16,0,0.11495467027028401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,float16,0,0.1197706659634908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.10566400488217671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,float16,0,0.053914666175842285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.046037331223487854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,float16,0,0.07675733168919881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.07073066631952922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.04894933104515076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,float16,0,0.06002666552861532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.056234667698542275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,float16,0,0.06203199923038483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.05821333328882853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,float16,0,0.03965333352486292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,float16,0,0.03368533402681351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.029018667836983997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.08096000055472057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.14469866951306662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,float16,0,0.03572800010442734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.034847999612490334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,float16,0,0.03583999971548716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.035530666510264076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,float16,0,0.021146667500336964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.026202666262785595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,float16,0,0.02120000123977661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,float16,0,0.022277332842350006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.02130666623512904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,float16,0,0.05481066803137461
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,float16,0,0.015599999576807022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,float16,0,0.017114666601022083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,float16,0,0.031583999594052635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.015824000040690105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,float16,0,0.015370666980743408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.041696002086003624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.015658666690190632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,float16,0,0.016623999923467636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.016607999801635742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,float16,0,0.015498666713635126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.01595199977358182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.03107733279466629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,float16,0,0.014032000054915747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,float16,0,0.02364266663789749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.011551999797423681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,float16,0,0.012298667182525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.011407999942700068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.02160000056028366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,float16,0,0.019637333850065868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,float16,0,0.1702559987703959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,float16,0,0.1760746637980143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.13704533378283182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.14170666535695395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,float16,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,float16,0,0.19455466667811075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.1590666671593984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.16375466187795004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,float16,0,0.19549866517384848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,float16,0,0.09252267082532246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.10415466626485188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.07471466561158498
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,float16,0,0.09328533212343852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.07738133271535237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,float16,0,0.09861333171526591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,float16,0,0.1000373363494873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.08494933446248372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.0853760043780009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,float16,0,0.060458665092786155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,float16,0,0.05148266752560934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.05613866448402405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,float16,0,0.05212266743183136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.043824002146720886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.043920000394185386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.04851733148097992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,float16,0,0.05489600201447805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,float16,0,0.05572799841562907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.04807466765244802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,float16,0,0.03148266673088074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.027290667096773785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,float16,0,0.032058666149775185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.029504001140594482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,float16,0,0.03232000023126602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.03073066721359889
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,float16,0,0.11783466736475627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,float16,0,0.019738666713237762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,float16,0,0.021274665991465252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,float16,0,0.020869334538777668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.01926400015751521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,float16,0,0.0194560003777345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.0191040001809597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,float16,0,0.020288000504175823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,float16,0,0.015322666615247726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,float16,0,0.01526933287580808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,float16,0,0.015072000523408255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,float16,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,float16,0,0.03449599941571554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.027503999571005504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,float16,0,0.013264000415802002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.013386666774749756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.011898666620254517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,float16,0,0.01192533348997434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.011941333611806234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,float16,0,0.011871999750534693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,float16,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,float16,0,0.01201066623131434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,float16,0,0.01720000058412552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.011424000064531961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,float16,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,float16,0,0.03165333221356074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,0,0.015370666980743408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,0,0.019088000059127808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,float16,0,0.011098666737476984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.03536533315976461
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,float16,0,0.04577066500981649
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.02555199960867564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.033802665770053864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,0,0.014261333892742792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,float16,0,0.025333332518736523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,0,0.010773333410422007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,float16,0,0.017370666066805523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,0,0.029605334003766377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,0,0.00980266680320104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,float16,0,0.013776000589132309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,0,0.009285333255926767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.009663999701539675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,0,0.011381333072980246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.01357866699496905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,0,0.00922133338948091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,0,0.00898133342464765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.009658666948477427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.009354666496316591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,0,0.008896000062425932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,0,0.008890666688481966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.009226666763424873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,0,0.00890666681031386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,float16,0,0.009237333511312803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.009194666519761086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,0,0.009893333539366722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.009178666397929192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,float16,0,0.01090666651725769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,5.61192003885905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,float16,0,7.4327042897542315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,float16,0,7.5239518483479815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.777200063069661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,0,0.00955200009047985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,float16,0,8.195637385050455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,6.611743927001953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,float16,0,3.4722773234049478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,float16,0,4.664490699768066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,4.037109375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,2.880154609680176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,6.548095703125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,float16,0,8.12232526143392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,float16,0,3.8382240931193032
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.9533974329630532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,float16,0,3.927695910135905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,float16,0,2.054325262705485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,float16,0,1.9995892842610676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,1.4932427406311035
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,3.8004639943440757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,float16,0,4.012144088745117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,3.2907946904500327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,2.16648530960083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.6366346677144368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,float16,0,1.9403200149536133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,1.6982240676879883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,float16,0,2.23907200495402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,1.7828853925069172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,float16,0,0.967461347579956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,float16,0,2.0475734074910483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,1.0106346607208252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.8126933574676514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.8423253695170084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,0.9458293120066324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,float16,0,1.0603413581848145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,0.9268159866333008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,3.2735039393107095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,float16,0,4.08896001180013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.4066667556762695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,float16,0,4.1304372151692705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,float16,0,1.0985173384348552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,float16,0,0.9932106335957845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,float16,0,1.071669340133667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,float16,0,4.719717343648274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,float16,0,2.4394933382670083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,float16,0,2.0489706993103027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,1.6927146911621094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,4.678810755411784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,2.2016587257385254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,float16,0,4.617381413777669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,4.102746645609538
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.7636960347493489
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,1.9751839637756348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,float16,0,2.2215894063313804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,2.0643626848856607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,float16,0,2.3454666137695312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,1.1831786632537842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,float16,0,2.357429345448812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,float16,0,1.3417387008666992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,float16,0,1.0683680375417073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.898138682047526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,float16,0,1.1263679663340251
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,0.9321813583374023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,1.0643626848856609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,float16,0,1.2723146279652913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,float16,0,0.6100586652755737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,0.6274133523305258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.497599999109904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,1.1553066571553547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,float16,0,1.1993652979532878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,float16,0,0.5847626527150472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.5775466759999593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,float16,0,0.6583679914474487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,float16,0,0.6370506683985392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,0.5923306544621786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,float16,0,2.859530766805013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,2.350074609120687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,float16,0,2.9722185134887695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.497978687286377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,2.790154774983724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,float16,0,3.3162720998128257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,float16,0,1.8251306215922039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,1.6416106224060059
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,2.860741297403971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,float16,0,0.668399969736735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,1.2790186405181885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,float16,0,3.3313118616739907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,0.5892373323440552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.2740800380706787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,float16,0,1.5122826894124348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,1.4831040700276692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,float16,0,1.673967997233073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,1.6146880785624187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,float16,0,1.682736078898112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,float16,0,0.9104693730672201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,float16,0,0.7655519644419352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.6534186601638794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,float16,0,0.7821439901987711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,0.8633866310119629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.7234453360239664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,float16,0,0.8806773026784261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,0.8090399901072184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,float16,0,0.41950400670369464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,float16,0,0.42024000485738117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.3800319830576579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,float16,0,0.8802293141682943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,float16,0,0.4936373233795166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,0.7953120072682699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,0.47599466641743976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3867679834365845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,float16,0,0.4750613371531169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.43645866711934406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,float16,0,0.47545599937438965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,float16,0,1.4677759806315105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,0.4480319817860921
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,3.0483360290527344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,float16,0,3.7246507008870444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,3.2367626825968423
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,float16,0,3.884959856669108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,4.086895942687988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,float16,0,4.485232035319011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,4.046127955118815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,float16,0,4.44594128926595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,float16,0,1.8562240600585938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.9508639971415203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,float16,0,2.404629389444987
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,float16,0,1.9578612645467122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,2.4820052782694497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.6527573267618816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,float16,0,2.4039840698242188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,2.4190452893575034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,float16,0,0.9625279903411865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,float16,0,1.2292266686757405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,1.1881706714630127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,2.2739946047465005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.8706933657328287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,float16,0,1.0015146732330322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,1.0496266682942708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,float16,0,1.158735990524292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,float16,0,1.1527626514434814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,1.0937386353810628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,float16,0,0.6344000101089478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,float16,0,0.5148746569951376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,float16,0,0.5285866657892863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.45227734247843426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.4729866584142049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,0.6154719988505045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,float16,0,0.6178293228149414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,0.5752426783243815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,0.5587093432744344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,float16,0,0.5999999841054281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,float16,0,0.35281598567962646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,float16,0,0.28334399064381915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,0.3464053471883138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.26053865750630695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,float16,0,0.2925173242886861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,float16,0,0.32755200068155926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,float16,0,0.3360160191853841
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,float16,0,2.2899093627929688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,0.319541335105896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.8734933535257975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,float16,0,2.166821320851644
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.816666603088379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,1.9498133659362793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,float16,0,2.2836693127950034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,float16,0,2.7784159978230796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,2.47598934173584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,float16,0,1.496496041615804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,float16,0,2.6920480728149414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,1.5141654014587402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,2.4327200253804526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,float16,0,1.1171253522237141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.9396959940592448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.3060479958852132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,float16,0,1.1692746480305989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,1.1537546316782634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,1.3441707293192546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,float16,0,1.4248372713724773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,1.3003040154774983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,float16,0,0.7661706606547037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,float16,0,1.3748693466186523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,float16,0,0.5786826610565186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.5018239816029867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,float16,0,0.5996319850285848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,0.7523093223571777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,float16,0,0.7292426427205404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,0.6650986671447754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,0.6617439985275269
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,float16,0,0.7441066900889078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.2780960003534953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,float16,0,0.31354133288065594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,float16,0,0.4071360031763713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,0.4068320194880168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,float16,0,0.3232906659444173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.30214399099349976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,float16,0,0.38308266798655194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.36319466431935626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,float16,0,0.3816959857940674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,0.37537066141764325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,float16,0,0.17990400393803915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.16947199900945029
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,float16,0,0.18621333440144858
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,float16,0,0.22973867257436117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,0.23203200101852417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.17589332660039267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.2730933427810669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,float16,0,0.21433067321777344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,0.20646933714548746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,float16,0,0.20231467485427856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,1.7495306332906086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,float16,0,2.218239943186442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,float16,0,2.0611732800801597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.5356800158818563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,float16,0,2.887392044067383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,2.7847518920898438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,float16,0,1.0359946886698406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,0.9011200269063314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,1.5790880521138508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,float16,0,1.5811573664347331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,2.592533270517985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,float16,0,2.7797441482543945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,float16,0,1.1016693115234375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.20287466049194336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,0.9920106728871664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,float16,0,1.4619733492533367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,1.4563040733337402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,float16,0,1.4800960222880046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,float16,0,0.5343360106150309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,1.383637269337972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,float16,0,0.7936053276062012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,0.8096106847127279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.4742186864217122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,1.9318079948425293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,float16,0,0.5801066557566324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.5182293256123861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,float16,0,0.7130666573842367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,0.6711520353953043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,float16,0,0.7252053419748942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,0.7443573474884033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,float16,0,0.2876960039138794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,float16,0,0.41330134868621826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.26012800137201947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,float16,0,0.30530667304992676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,0.4281280040740967
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.2845226724942525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.38787198066711426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,float16,0,0.3973226547241211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,0.3678239981333415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,float16,0,0.1607039968172709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,float16,0,0.22952532768249512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.15293332934379578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,0.2370133399963379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.1616213321685791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,float16,0,0.17067732413609824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,float16,0,0.202890674273173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.19731199741363525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,0.21186665693918863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,float16,0,0.21674132347106934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,float16,0,0.10549333691596985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.12988266348838806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.09959999720255534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,float16,0,0.10995200276374817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,float16,0,0.11972266435623169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.12027200063069661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.1222826639811198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,float16,0,0.12307733297348022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,1.0782559712727864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,float16,0,1.2531466484069824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,float16,0,1.3580640157063801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,1.217626651128133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,float16,0,0.3902133305867513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,float16,0,1.844938596089681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,1.7336479822794597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,1.7004213333129883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,float16,0,1.8981067339579265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,float16,0,1.01036802927653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,float16,0,0.13636266191800436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.1051626702149709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,float16,0,0.6382826566696167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.5580906470616659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,float16,0,0.6920106410980225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,1.0624266465504963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,0.6248533328374227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,float16,0,0.9346559842427572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,0.9588960011800131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,float16,0,0.3351893424987793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,float16,0,0.9220053354899088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,float16,0,0.35762667655944824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.3023253281911214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,float16,0,0.5212053457895914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,0.8722133636474609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,float16,0,0.4733440081278483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.4641280174255371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,float16,0,0.27738134066263836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,float16,0,0.45958932240804035
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,0.45549333095550537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,float16,0,0.18237332503000894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,0.2924319903055827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.1651946703592936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,float16,0,0.1999680002530416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.18070934216181436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,float16,0,0.25733866294225055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.24914666016896567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,0.25118933121363324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,float16,0,0.252405325571696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,float16,0,0.10866133371988933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.10242133339246114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,float16,0,0.11285866300264995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,float16,0,0.1572480003039042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,0.16243732968966165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.13530133167902628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.13915733496348062
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,float16,0,0.08707732955614726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,float16,0,0.07276266813278198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.0909546713034312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.06773333251476288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.07239999870459239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,float16,0,0.07457600037256877
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,float16,0,0.08260266482830048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.08374933401743571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,float16,0,0.08399466673533122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,0.5491413275400797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.08505066235860188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,float16,0,1.2406400044759114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,1.0893706480662029
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,float16,0,1.3890719413757324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.11027200023333232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,float16,0,0.132042666276296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,float16,0,0.1451573371887207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,1.2696213722229004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,float16,0,2.1016693115234375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,2.0942346254984536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,float16,0,1.1327199935913086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,float16,0,1.924330711364746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.337226668993632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,1.8899466196695964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,float16,0,0.6329386631647745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,1.2318346500396729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.5645226637522379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,float16,0,1.049616018931071
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,float16,0,0.6914026737213135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,0.6466346581776937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,1.0876800219217937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,1.0719892978668213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,float16,0,1.0693546930948894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,float16,0,0.32387733459472656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.29651200771331787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,float16,0,0.35870933532714844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,0.6297119855880737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.34910400708516437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,float16,0,0.5120586554209391
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.522986650466919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,float16,0,0.17773334185282388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,0.3306986689567566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,0.5141493479410807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,float16,0,0.19455466667811075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,float16,0,0.30242133140563965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,float16,0,0.541264017422994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.1864266594250997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,float16,0,0.2779200077056885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.2902933359146118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,0.2806933323542277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,float16,0,0.27297067642211914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,float16,0,0.10180266698201497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.09685333569844563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,0.17948265870412192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,float16,0,0.10707733035087585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.10770666599273682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,float16,0,0.1386133333047231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.14028799533843994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,float16,0,0.15421866377194723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,float16,0,0.06542933483918507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,float16,0,0.09512533744176228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.09297600388526917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.06047999858856201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,float16,0,0.06830400228500366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.06658133367697398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,float16,0,0.08074133098125458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.08287466565767924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,float16,0,0.08224533498287201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.0846720039844513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,float16,0,0.5766293207804362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,float16,0,0.0413973331451416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.03982933362325033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,float16,0,0.04376000165939331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.04193066557248434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,float16,0,0.04814399778842926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.16317866245905557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,float16,0,0.04941866795221964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.05048533280690511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,float16,0,0.166512002547582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,float16,0,0.7789813677469889
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,0.6993119716644287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,float16,0,0.8908747037251791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.15527466932932535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,0.8311786651611328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,float16,0,1.3638134002685547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,1.2756853103637695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,1.3180906772613525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.0540533314148585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,float16,0,1.3230559825897217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,float16,0,0.40007468064626056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.050144001841545105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,0.862394650777181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,float16,0,0.7750720183054606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.3699893156687419
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,float16,0,0.446666677792867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.43381333351135254
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,float16,0,0.720853328704834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,0.7618453502655029
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,float16,0,0.668608029683431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,0.7212586402893066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,float16,0,0.39979199568430585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.19915199279785156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,0.4443039894104004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.23527467250823975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.36262933413187665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,0.35177600383758545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,float16,0,0.34062933921813965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,0.23526400327682495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,float16,0,0.21278399229049683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.10948800047238667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,float16,0,0.136053333679835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.12101333340009053
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,float16,0,0.05003199974695841
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,float16,0,0.1893440087636312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.19410133361816406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,float16,0,0.18640534083048502
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.19479467471440634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,float16,0,0.11812266707420349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,float16,0,0.07100266714890797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.1279253363609314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,float16,0,0.07634133100509644
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.06677333513895671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,float16,0,0.216922660668691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.10017066200574239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,float16,0,0.09490666786829631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,float16,0,0.24198400974273682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,float16,0,0.35686933994293213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,float16,0,0.10340266426404317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.10126399993896484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,float16,0,0.04736533264319102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,float16,0,0.06232533355553945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,float16,0,0.05036266644795736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.04529066880544027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.06638399759928386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.048154667019844055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,float16,0,0.058149332801500954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,float16,0,0.12084266543388367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.060405333836873375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.06172266602516174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,float16,0,0.058378666639328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,float16,0,0.03345600018898646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.0337119996547699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,float16,0,0.03568533311287562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,float16,0,0.0391893337170283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.039834665755430855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.04137066751718521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,float16,0,0.039647998909155525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.07537599901358287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,float16,0,0.841317335764567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,0.7546133200327555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,float16,0,0.9767680168151855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,0.9295626481374105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.043951998154322304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.035455999275048576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,float16,0,1.69376007715861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,1.5506879488627117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,float16,0,1.6884320576985676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,1.7728907267252605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,float16,0,0.4278186559677124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,float16,0,0.930069367090861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.396725336710612
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,1.0550986925760906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,float16,0,0.49582401911417645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,0.47722665468851727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,float16,0,0.840661366780599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,float16,0,0.8195359706878662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,0.8986079692840576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,float16,0,0.4730986754099528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,0.8102347056070963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,0.5358506838480631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,float16,0,0.22329066197077432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,float16,0,0.2672426700592041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.2531786759694417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.20866133769353232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.4047040144602458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,float16,0,0.40917332967122394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,float16,0,0.44253333409627277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,0.47259732087453205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,float16,0,0.0402453343073527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,float16,0,0.1241919994354248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.1399946709473928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,float16,0,0.2468106746673584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,float16,0,0.14247999588648477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.11471466223398845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,float16,0,0.21861332654953003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,float16,0,0.2287893295288086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,float16,0,0.07054933408896129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.21915199359258017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.06638933221499126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,float16,0,0.13428800304730734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,float16,0,0.07656000057856242
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.15061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,float16,0,0.10716799894968669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.1102133293946584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,float16,0,0.12029866377512614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.1185653309027354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,float16,0,0.04493333399295807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,float16,0,0.07527466615041097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.04188266893227895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.0744053324063619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,float16,0,0.04778666794300079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.048154667019844055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,float16,0,0.06038400034109751
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.06428266565004985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,float16,0,0.060821334520975746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.0663679987192154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,float16,0,0.02942933390537898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.041893333196640015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,float16,0,0.03792533278465271
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,float16,0,0.029487999776999157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.027290667096773785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.02976000060637792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,float16,0,0.03577066709597906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.03754666695992152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.037946666280428566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,float16,0,0.035930665830771126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,float16,0,0.025306666890780132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.02518400053183238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,float16,0,0.03170666595300039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,float16,0,0.027376001079877216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,float16,0,0.02958933264017105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,0.27850667635599774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.030026666820049286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.21835199991861978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,float16,0,0.029722665747006733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.03143999973932902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.0767680009206136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,float16,0,0.6421653429667155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,0.5998026529947916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,float16,0,0.7796213626861572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,0.7660426298777262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,float16,0,1.5124053955078125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.033615998923778534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,1.6053439776102703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,float16,0,0.8307039737701416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.30664000908533734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,float16,0,1.3912213643391926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,1.4623146057128906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,float16,0,0.33293332656224567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,float16,0,0.40677865346272785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,float16,0,0.7234986623128256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,0.7044853369394938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,float16,0,0.4227413336435954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,0.49371735254923504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,float16,0,0.1795413295427958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,float16,0,0.2196000019709269
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,0.9647573630015055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.2142560084660848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.16511999567349753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,float16,0,0.7662719885508219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,0.7026027043660482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,float16,0,0.3641386826833089
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,0.4000053405761719
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,float16,0,0.22059732675552368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,float16,0,0.3497759898503621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,0.3812640110651652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.09055999914805095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,float16,0,0.10361066460609436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.11322666207949321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,float16,0,0.12184000015258789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,float16,0,0.18652800718943277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,float16,0,0.19599467515945435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.2539466619491577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.19550933440526327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.13583466410636902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,float16,0,0.11967466274897258
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,float16,0,0.06282133360703786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.05231999854246775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.0641653339068095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,float16,0,0.05542399982611338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,float16,0,0.09895466764767964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.09664000074068706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,float16,0,0.03474666674931844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,float16,0,0.11055999994277954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.033333333830038704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.06412266691525777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,float16,0,0.03806400050719579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.039221333960692085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,float16,0,0.049973333875338234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.35842132568359375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.05527466535568237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,float16,0,0.051776001850763954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.05603733162085215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,float16,0,0.021312000850836437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.023381332556406658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,float16,0,0.023520000278949738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,float16,0,0.0314026673634847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.037808001041412354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,float16,0,0.029482667644818623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,float16,0,0.029685333371162415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.03366933266321818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,float16,0,0.01945066700379054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.01937066639463107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,float16,0,0.023226665953795116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.02741333345572154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,float16,0,0.021242665747801464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.21547200282414755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,float16,0,0.023215999205907185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.025450666745503742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,float16,0,0.023413332800070446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,float16,0,0.021216000119845074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,float16,0,0.019071999937295914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.017642666896184284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,float16,0,0.021312000850836437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.10824533303578694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.020224000016848247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,float16,0,0.064751997590065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,float16,0,0.021317332983016968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.021386665602525074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.02571733295917511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,float16,0,0.2712533275286357
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.2598506609598796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,float16,0,0.33745066324869794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,0.34151466687520343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,float16,0,0.6563786665598551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,0.6811359723409017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,float16,0,0.38866666952768963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,0.46164798736572266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,float16,0,0.14326933026313782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,0.6613440116246542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.13661332925160727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,float16,0,0.6944106419881185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,float16,0,0.18559465805689493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.3564106623331706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,float16,0,0.20147200425465903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.18601600329081217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,float16,0,0.07918400069077809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,float16,0,0.33879466851552326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,0.3468533356984456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.07469866673151652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,float16,0,0.18425599733988443
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,float16,0,0.3280319968859355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.09624000390370686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,float16,0,0.09707199533780415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,float16,0,0.11065066854159038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,float16,0,0.04576000074545542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.1777013341585795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.04417066772778829
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.23759466409683228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,float16,0,0.16510933637619019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.054042667150497437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,float16,0,0.051829333106676735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.08841600020726521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.1270026663939158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,float16,0,0.09177600344022115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,float16,0,0.057061334451039634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.0581279993057251
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,float16,0,0.027386667827765148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,float16,0,0.031685332457224526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.10167466600735982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.04961599906285604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,float16,0,0.09860266248385112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,float16,0,0.043653334180514015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.03169599920511246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.05041599770387014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,float16,0,0.044112001856168113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,float16,0,0.02739733209212621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.021381333470344543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,float16,0,0.019088000059127808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,float16,0,0.025583999852339428
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,float16,0,0.025589334468046825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.029637334247430164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,float16,0,0.01703466723362605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,float16,0,0.021055998901526134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.015402667224407196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,float16,0,0.019738666713237762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.1897653341293335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.021589333812395733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,float16,0,0.02124800036350886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,float16,0,0.01738133281469345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,float16,0,0.015119999647140503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,float16,0,0.015376000354687372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,float16,0,0.017018667111794155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,float16,0,0.01706133286158244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,float16,0,0.016048000504573185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,float16,0,0.015178666760524115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,float16,0,0.014959999670584997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.03359466542800268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.025487999121348064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,float16,0,0.017375999440749485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,float16,0,0.16713599363962808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.1583146651585897
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,float16,0,0.21170133352279663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.20107199748357138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,float16,0,0.01526933287580808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.3468960126241048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,float16,0,0.212336003780365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.24885332584381104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,float16,0,0.3487519820531209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,float16,0,0.09309333562850952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,0.3960426648457845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.08687999844551086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.18866666158040366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,float16,0,0.11060800155003865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.10770666599273682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,float16,0,0.1904053290685018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,float16,0,0.19682133197784424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.20748267571131387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,float16,0,0.11276800433794658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.12995200355847678
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,float16,0,0.05885333319505056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.04786133269468943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.05827199916044871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,float16,0,0.09850666920344035
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.09303466478983562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,float16,0,0.3632906675338745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,float16,0,0.03145600110292435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.09715732932090759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,float16,0,0.10366400082906087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.06238399942715963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,float16,0,0.059994667768478394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,float16,0,0.03552533437808355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.029520000020662945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.03568000098069509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,float16,0,0.04630400240421295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,float16,0,0.04717866579691569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.05193066596984863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,float16,0,0.01921066641807556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.05406400064627329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.03568533311287562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,float16,0,0.02942933390537898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,float16,0,0.027258666853109997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.02181866765022278
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.029680001238981884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,float16,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.03180799881617228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.013397333522637686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,float16,0,0.050010666251182556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.02130666623512904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,float16,0,0.015285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,float16,0,0.019258666783571243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.01545599972208341
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,float16,0,0.01743999992807706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.019189332922299702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.02082666630546252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,float16,0,0.01782400036851565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,float16,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,float16,0,0.015098666151364645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,float16,0,0.021189334491888683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,float16,0,0.02755733331044515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,float16,0,0.012879999975363413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,float16,0,0.14019733667373657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,float16,0,0.15526400009791055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.1449013352394104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,float16,0,0.23201066255569458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.248906672000885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,float16,0,0.13594133655230203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,float16,0,0.07473599910736084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.06703466673692067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,float16,0,0.2241386572519938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.14897599816322327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.2399466633796692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,float16,0,0.08284266789754231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.07638399799664815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,float16,0,0.12157332897186279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.11038399736086528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,float16,0,0.12762666742006937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.1251626710096995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,float16,0,0.04275199770927429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,float16,0,0.07301866511503856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.03783999880154928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.12444266676902771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,float16,0,0.046309332052866616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.04364266494909922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,float16,0,0.058117335041364036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.06016000111897787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,float16,0,0.05842133363087972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,float16,0,0.027024000883102417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.0397173340121905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.06234666705131531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,float16,0,0.03591466695070267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.025381334125995636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.027301333844661713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,float16,0,0.027285332481066387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,float16,0,0.03379733363787333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.035829332967599235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.035674666364987694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,float16,0,0.03364799916744232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,float16,0,0.02161066730817159
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,float16,0,0.01912533367673556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.01899733394384384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,float16,0,0.02149333308140437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,float16,0,0.02163733293612798
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,float16,0,0.015247999380032221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.013376000026861826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,float16,0,0.015295999745527903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.06989866495132446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,float16,0,0.01524266724785169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,float16,0,0.01333333303531011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,float16,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,float16,0,0.017386666188637417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,float16,0,0.011125333607196808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,float16,0,0.011354666203260422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,float16,0,0.1250933309396108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.10422933101654053
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,float16,0,0.1351093351840973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.023226665953795116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,float16,0,0.013306666165590286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.11348266402880351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,float16,0,0.1738133430480957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.14825600385665894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,float16,0,0.17802133162816366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.16859734058380127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,float16,0,0.06884266436100006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.09404266873995464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.0581226646900177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,float16,0,0.07115200161933899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.06376533210277557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,float16,0,0.08357333143552144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.07884266475836436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,float16,0,0.08771199981371562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.08083733419577281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,float16,0,0.03966933240493139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,float16,0,0.0481333335240682
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.034048000971476235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,float16,0,0.04197333256403605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,float16,0,0.04590400060017904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.043749332427978516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,float16,0,0.047370667258898415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.045738667249679565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,float16,0,0.023743999501069386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.014671999961137772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.029765332738558452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,float16,0,0.029706666866938274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,float16,0,0.025610665480295818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.02334933231274287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,float16,0,0.028250666956106823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.028970666229724884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,float16,0,0.09948800007502238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,float16,0,0.019061333189407986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.019354666272799175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.015850666910409927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,float16,0,0.017301333447297413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,float16,0,0.018944000204404194
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.0183146670460701
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.037434667348861694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.023397333920001984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,float16,0,0.029546665648619335
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,float16,0,0.011418666690587997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.012080000092585882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,float16,0,0.011247999966144562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.011690666278203329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,float16,0,0.010944000134865442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,float16,0,0.011098666737476984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.027701333165168762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.09595200419425964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,float16,0,0.11768533786137898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,float16,0,0.12051199873288472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.10223999619483948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,float16,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,float16,0,0.1357973317305247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.11798399686813354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,float16,0,0.08085866769154866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,float16,0,0.1407786707083384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.12123200297355652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.06867200136184692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.05423999826113383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,float16,0,0.06649066507816315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,float16,0,0.07206933200359344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.05702400207519531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.06423466900984447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,float16,0,0.07284266750017802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.06545066833496094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.0317493329445521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,float16,0,0.04180799921353658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.03956799954175949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,float16,0,0.03961600114901861
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.03386666625738144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,float16,0,0.04172799984614054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.03783999880154928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,float16,0,0.041637333730856575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.037946666280428566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,float16,0,0.023498666783173878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,float16,0,0.025648000339667004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,float16,0,0.023605334262053173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.02130666623512904
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,float16,0,0.025258667767047882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.02351466566324234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.023978665471076965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,float16,0,0.02550933261712392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,float16,0,0.06590400139490764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,float16,0,0.0170666662355264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.015418666104475657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,float16,0,0.016890666137139004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,float16,0,0.017279999951521557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.015370666980743408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,float16,0,0.0378506655494372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,float16,0,0.017055999487638474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,float16,0,0.013424000392357508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.021381333470344543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.025445332129796345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,float16,0,0.01293333371480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,float16,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,float16,0,0.011221333096424738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.011488000551859537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,float16,0,0.011557333171367645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,float16,0,0.011333333949247995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.011445333560307821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,float16,0,0.012543999900420507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,float16,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.011887999872366587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,float16,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,float16,0,0.011600000162919363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,0,0.02943466603755951
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,float16,0,0.02941333254178365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,0,0.012053333222866058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.02550933261712392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,0,0.01434133326013883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,0,0.00966933307548364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,float16,0,0.019674666225910187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,0,0.019487999379634857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.017290666699409485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,float16,0,0.01422400027513504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,0,0.009216000015536943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,0,0.010890666395425797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.01044800008336703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.025519999365011852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,float16,0,0.010928000013033548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,0,0.009242666885256767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,0,0.009658666948477427
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,float16,0,0.010239999741315842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.01770666614174843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,0,0.00949866697192192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.009877333417534828
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.009296000003814697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.010175999874869982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.009359999870260557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,0,0.00921066664159298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.009306666751702627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.009242666885256767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,0,0.009082666908701261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,0,0.009253333633144697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.009253333633144697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,float16,0,0.009488000224033991
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.009301333377758661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.00961599995692571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,4.327439943949382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,float16,0,5.569674809773763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,4.504117329915364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,float16,0,5.638383865356445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,0,0.00921066664159298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,float16,0,3.1371679306030273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,float16,0,2.7214558919270835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,2.79207452138265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,3.4462401072184243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,float16,0,2.7698774337768555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,5.362794876098633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,float16,0,6.343514760335286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.3248160680135093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,1.1839093367258708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,float16,0,1.4108959833780925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,1.5099786122639973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,float16,0,1.6652587254842122
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,float16,0,1.4772639274597168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,float16,0,3.092159907023112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,3.067040125528971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.2340319951375325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,float16,0,0.778602679570516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,0.8295893669128418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,1.4391093254089355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.7172106901804606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,float16,0,0.7856319745381674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,float16,0,0.8707946936289469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,0.7671039899190267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,2.618293285369873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,float16,0,3.1176160176595054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,float16,0,3.2185386021931968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,2.7062772115071616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,float16,0,3.7003520329793296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,float16,0,1.5964639981587727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,float16,0,2.0698506037394204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,3.113247871398926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,2.3133066495259604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,float16,0,0.8782026767730713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,float16,0,1.589903990427653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.6767306327819824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,float16,0,1.6602293650309246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.4579680760701497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,float16,0,0.8274239699045817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,float16,0,1.8434826532999675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,0.949173370997111
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,float16,0,1.011679967244466
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.7458773454030355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,1.920149326324463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,0.7589866320292155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,float16,0,0.9647146860758463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,0.8801600138346354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,float16,0,0.4569653272628784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,float16,0,0.5500320196151733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,0.535103996594747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,float16,0,0.4641813437143962
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.4249653418858846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,float16,0,0.537066658337911
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,0.4769706726074219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,float16,0,2.218719959259033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.8390827178955078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,float16,0,2.2681333223978677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,1.952031930287679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,1.3917279243469238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,float16,0,2.6678667068481445
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,float16,0,1.137221336364746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.966650644938151
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,1.3713439305623372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,2.5109705924987793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,float16,0,1.1933759848276775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,float16,0,0.8585333029429117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,1.024234692255656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.4098026752471924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,1.2567946910858154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,float16,0,0.7621813615163168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,float16,0,0.6029920180638632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,0.7236479918162028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.5260586738586426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,float16,0,0.6302666664123535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.5694719950358073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,float16,0,0.7268640200297037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,0.6743520100911459
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,float16,0,0.4116213321685791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,float16,0,0.33747732639312744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,0.4084159930547078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.3102026581764221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,float16,0,0.3465706507364909
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.3242933352788289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,float16,0,0.39561601479848224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,0.3654773235321045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,float16,0,1.4129014015197754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.4050985972086587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,float16,0,2.87775452931722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,float16,0,2.97159481048584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,2.582848072052002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,float16,0,1.3661333719889324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,float16,0,3.746272087097168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,1.2450453440348308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,float16,0,1.5765652656555176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,float16,0,1.9267733891805012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,float16,0,1.5268425941467285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,3.4333438873291016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.3845760027567546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,float16,0,0.769536018371582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.6611040035883585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,0.9688373406728109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,float16,0,1.0267893473307292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,1.6668160756429036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,float16,0,1.8399839401245117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,float16,0,0.8062933286031088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,float16,0,0.4096693197886149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,0.7514346440633138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,float16,0,0.9395039876302084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.36850666999816895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,0.5230613152186075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,0.9091573556264242
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.3929866552352905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,float16,0,0.5071200132369995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,0.497162659962972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,float16,0,0.2382133404413859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,0.294325331846873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,float16,0,0.29732799530029297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.22025066614151
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,float16,0,0.24336532751719156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.23243733247121176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,float16,0,0.26904000838597614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.26580266157786053
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,float16,0,1.6743680636088054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.4517547289530437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,float16,0,1.791744073232015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,2.0701866149902344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,1.5893279711405437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,float16,0,2.317189375559489
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.753712018330892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,float16,0,0.8723680178324381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,float16,0,1.226591984430949
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,1.3159786860148113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,2.2061492602030435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,float16,0,0.9228800137837728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,0.8281493186950684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,float16,0,0.5307946602503458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,float16,0,0.42612266540527344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,float16,0,0.4596373240152995
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,float16,0,0.6474506855010986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,0.6508640050888062
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.41160531838734943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,float16,0,0.4875839948654175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.4448213179906209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,float16,0,0.6073013146718343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,float16,0,0.24939733743667603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,0.5798453489939371
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.2339199980099996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,float16,0,0.34490132331848145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,float16,0,0.26784000794092816
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.24672534068425497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,float16,0,0.3338133494059245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.3054400086402893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,float16,0,0.20503467321395874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,0.19515732924143472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.15150933464368185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,float16,0,0.15798399845759073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,float16,0,0.16368533174196878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.16031466921170553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.18333866198857626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,float16,0,0.18366400400797525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.4186773300170898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,1.6025333404541016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,float16,0,1.7438613573710124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,float16,0,2.481562614440918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,1.1204640070597331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,float16,0,1.2961440086364746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,0.3508319854736328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,float16,0,0.8355466524759928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,2.4473013877868652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,1.354405403137207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,float16,0,0.892736037572225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.741487979888916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,0.8255306879679362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,float16,0,1.2511520385742188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,1.1374346415201824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,float16,0,0.4296533266703288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,float16,0,0.667797327041626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,0.6986506779988607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.3909279902776082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,float16,0,0.4712640047073364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.4411840041478475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,float16,0,1.6113440195719402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,float16,0,0.6335306564966837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,float16,0,0.23547732830047607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,0.600389321645101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,0.3729226589202881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,float16,0,0.35412800312042236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.2185759941736857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,float16,0,1.1560320059458415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.2388426661491394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.3484799861907959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,0.20479466517766318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,float16,0,0.1458506683508555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.13201600313186646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,float16,0,0.19966399669647217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.14272532860438028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,float16,0,0.17380799849828085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.17635732889175415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,float16,0,0.09107200304667155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.11226666967074077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,float16,0,0.11032000184059143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.0848479966322581
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,float16,0,0.09488532940546672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.09134399890899658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,float16,0,0.10641599694887798
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.10947199662526448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,float16,0,1.0887733300526936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,0.8889280160268148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,1.0183839797973633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,float16,0,1.5797227223714192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,0.9256319999694824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,float16,0,0.5137973229090372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,1.4303520520528157
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,float16,0,0.8638666470845541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,float16,0,0.256442666053772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,float16,0,0.1388746698697408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.46805866559346515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.5365333159764608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,float16,0,0.5602506796518961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,float16,0,0.7925919691721598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,0.7949386437733968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,float16,0,0.26972800493240356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.25757332642873126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,float16,0,0.30156266689300537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,0.48079466819763184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.285098671913147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,float16,0,0.4215306838353475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.4325653314590454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,float16,0,0.15032000343004862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,float16,0,0.24249066909154257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.14501866698265076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,float16,0,0.16614400347073874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,float16,0,0.2279413342475891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,float16,0,0.9901653130849203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.2187839945157369
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,float16,0,0.14061333735783896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,float16,0,0.09722666939099629
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.09116266171137492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,float16,0,0.101583997408549
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.09925867120424907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,float16,0,0.12010133266448975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.12475732962290446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,float16,0,0.07098133365313213
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,float16,0,0.05825600028038025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.07652799785137177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.055973331133524575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,float16,0,0.3415626684824626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,float16,0,0.0603359987338384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.059664001067479454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,float16,0,0.06853866577148438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.07194666564464569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,float16,0,0.4470133384068807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,float16,0,1.0050666332244873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,0.9257866541544596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,0.2590293288230896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,float16,0,1.1405493418375652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,1.0952266852060955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.15741866827011108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,float16,0,1.854570706685384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.13595733046531677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,float16,0,0.5796746810277303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,1.0861546993255615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,float16,0,0.5125013192494711
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.48262933890024823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,float16,0,0.9825279712677002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,float16,0,0.8984533150990804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,0.5643146832784017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,1.9416426022847493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,0.8814773559570312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,float16,0,0.2691413362820943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.25546133518218994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,float16,0,0.30481066306432086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,0.5578720172246298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.3002026677131653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,0.4640959898630778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,float16,0,0.1518186628818512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,float16,0,0.2691199978192647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,0.29346134265263873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.16238933801651
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,float16,0,0.24427199363708496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.26917866865793866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,float16,0,0.14883200327555338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,float16,0,0.08873066306114197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.15890133380889893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.08534399668375652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,float16,0,0.09578667084376018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.09735999504725139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,float16,0,0.12533332904179892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.13014400005340576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,float16,0,0.05940799911816915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,float16,0,0.07824533184369405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.08444799979527791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.0562666654586792
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,float16,0,0.06243733565012614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,float16,0,0.07437866429487865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.07874666651089986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,float16,0,0.5068053404490153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,float16,0,0.4596000115076701
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,float16,0,0.050255998969078064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.05421333511670431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,float16,0,0.04165866722663244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.04160533348719279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,float16,0,0.043738668163617454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.04394666850566864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,float16,0,0.049135997891426086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,float16,0,0.16796799500783285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.05180799961090088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,float16,0,0.6363093455632528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,0.6008640130360922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,float16,0,1.271557331085205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,0.7397226492563883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,1.136240005493164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,float16,0,0.33882665634155273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,float16,0,0.6861759821573893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.3168693383534749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,0.769973357518514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.06286933521429698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.3863840103149414
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,float16,0,0.3871413469314575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,float16,0,0.6569013198216757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,0.6167253255844116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,0.3978453477223714
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,float16,0,0.3561760187149048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,float16,0,0.18453333775202432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.1770240068435669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,float16,0,0.21234132846196493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.20997333526611328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.1405333379904429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,float16,0,0.3351786533991496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,float16,0,0.10252267122268677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.34842665990193683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,float16,0,0.1179039975007375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.09914132952690125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,float16,0,0.19246933857599893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.10921600461006165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,float16,0,0.17964265743891397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.16953599452972412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.061093335350354515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,float16,0,0.06408533453941345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,float16,0,0.06846933563550313
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.06926399966080983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.10519466797510783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,float16,0,0.10705600182215373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,float16,0,0.08602666854858398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.09373333056767781
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,float16,0,0.038975998759269714
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,float16,0,0.7476800282796224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,float16,0,0.05194133520126343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.05842133363087972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,float16,0,0.04162666698296865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.04191466669241587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,float16,0,0.0499839981396993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.03364799916744232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,float16,0,0.035973332822322845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.04382933179537455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.03583999971548716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.04193066557248434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,float16,0,0.03972266614437103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,float16,0,0.693669319152832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,0.6667253176371256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,float16,0,0.8315146764119467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.21171200275421143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,0.8413653373718262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.03777066618204117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,float16,0,0.035546667873859406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,float16,0,0.04174399872620901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,float16,0,1.562272071838379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,1.6682186126708984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,float16,0,0.3630559841791789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,0.952293316523234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,float16,0,0.8323946793874105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.35095465183258057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,float16,0,0.42819201946258545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,0.4885813395182292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.43854931990305585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,float16,0,0.7793333530426025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,float16,0,0.1950933337211609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.18794665733973184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,float16,0,0.23567465941111246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.23081066211064658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,float16,0,0.38997864723205566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.39454933007558185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,float16,0,0.22526933749516806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.05420266588528951
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.2553439935048421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,float16,0,0.10948800047238667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,float16,0,0.1302880048751831
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.12051733334859212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,float16,0,0.2136746644973755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.23040533065795898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,float16,0,0.12398933370908101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,float16,0,0.06457599997520447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.06247999767462412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,float16,0,0.07057600220044453
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.07251733541488647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,float16,0,0.0942133367061615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.10656533638636272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,float16,0,0.061237335205078125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,float16,0,0.4283999999364217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,float16,0,0.04228800038496653
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.06835199892520905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.04186666508515676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,float16,0,0.04613333443800608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,float16,0,0.05804799993832906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.06420266628265381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.04644800225893656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,0.7607626914978027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,float16,0,0.03793599953254064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,float16,0,0.029317334294319153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.029365333418051403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,float16,0,0.03170666595300039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.04193066557248434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,float16,0,0.037685332198937736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.03958400090535482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,float16,0,0.027642667293548584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,float16,0,0.03142400085926056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.03358400116364161
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.027269333600997925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,float16,0,0.02942933390537898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,float16,0,0.03154666721820831
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.03203733265399933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.10136000315348308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.13593600193659464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,float16,0,0.5546346505482992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,0.5391146739323934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,float16,0,0.6933546861012777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,0.716543992360433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,float16,0,0.7560160160064697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,float16,0,1.4169599215189617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,0.8819626967112223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,1.5438772837320964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.03160533308982849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.2860959966977437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,float16,0,0.3688053290049235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.36773868401845294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,float16,0,0.15772266189257303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,0.4527946710586548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.15438933173815408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,0.8030186494191488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,float16,0,0.1941493352254232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,float16,0,0.7016479969024658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,float16,0,0.38760534922281903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.34306132793426514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,float16,0,0.35501333077748615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.23480532566706339
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.08116800089677174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,float16,0,0.2035306692123413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,float16,0,0.10902933279673259
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,float16,0,0.09030933181444804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.10514133175214131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,float16,0,0.18363199631373087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,float16,0,0.11106666922569275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,float16,0,0.05029866596062978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.05004799862702688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.12436800201733907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.06123200058937073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,float16,0,0.0817493349313736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.09422399600346883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,float16,0,0.05225066840648651
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.03384000062942505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,float16,0,0.03385599950949351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.06157866617043813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,float16,0,0.03793066740036011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.03977599988381068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,float16,0,0.0498986691236496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.05615466833114624
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.19519466161727905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,float16,0,0.0317546675602595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,float16,0,0.023215999205907185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.03758399933576584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.02521066615978877
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.02348800003528595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,float16,0,0.025663999219735462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,float16,0,0.03142400085926056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,float16,0,0.021333334346612293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,float16,0,0.025546667476495106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.021290667355060577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,float16,0,0.023221333821614582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.023599999646345775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,float16,0,0.025333332518736523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,float16,0,0.2948373357454936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.02731200059254964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,float16,0,0.021194666624069214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.20683199167251587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,float16,0,0.02345066765944163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,float16,0,0.05806399881839752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,float16,0,0.021157334248224895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,float16,0,0.02327999969323476
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.023178666830062866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,float16,0,0.2448319991429647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.02784000088771184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.24882133801778158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.3344533443450928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,float16,0,0.3228000005086263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.021226666867733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.023232000569502514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,0.628544012705485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,float16,0,0.3652426799138387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.13268799583117166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,float16,0,0.1323199967543284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,0.4309600194295247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,float16,0,0.1758240063985189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.17352533340454102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,float16,0,0.193557341893514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.22619199752807617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,float16,0,0.6239626804987589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,float16,0,0.3181653420130412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.07259200016657512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.3260106643040975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.11748799681663513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,float16,0,0.1027946670850118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,float16,0,0.07714666426181793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,float16,0,0.1808639963467916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.09648000200589497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.04394666850566864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,float16,0,0.04379733403523763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.054272000988324486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.08797333637873332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,float16,0,0.0777706652879715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.19579199949900308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,float16,0,0.029717333614826202
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,float16,0,0.04790399968624115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.05797866483529409
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.02977599948644638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,float16,0,0.03403199960788091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.03583999971548716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,float16,0,0.04577066500981649
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.052469333012898765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,float16,0,0.021312000850836437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.02109866589307785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,float16,0,0.029301332930723827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,float16,0,0.021354667842388153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.02346666653951009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,float16,0,0.0946613351504008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,float16,0,0.027674667537212372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.03158933420976003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,float16,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,float16,0,0.023242667317390442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,float16,0,0.01930133377512296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.019567999988794327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,float16,0,0.021674667795499165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.023183998962243397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.020442667106787365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,float16,0,0.01932266727089882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.019487999379634857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,float16,0,0.017103999853134155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,float16,0,0.017637333522240322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,float16,0,0.017173333714405697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,float16,0,0.017370666066805523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,float16,0,0.050213331977526345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.033728001018365227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,float16,0,0.14846932888031006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.14455999930699667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,float16,0,0.1910933256149292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,float16,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.19301867485046387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,float16,0,0.1967466672261556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.22904000679651895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,float16,0,0.34088532129923504
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,float16,0,0.08290666838486989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.07691200077533722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,float16,0,0.10065066814422607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.10051199793815613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,float16,0,0.10577600200970967
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,float16,0,0.18574933211008707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,float16,0,0.04645333190759023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.2054133415222168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.0465280016263326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.12035733461380005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,float16,0,0.053946668903032936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.057392001152038574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,float16,0,0.08474666873613994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.09105599919954936
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,float16,0,0.047184000412623085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.05630933245023092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,float16,0,0.03233066697915395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.03383466601371765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,float16,0,0.04381333291530609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.05065600077311198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.033930666744709015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,float16,0,0.02941333254178365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,float16,0,0.022639999787012737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.023365333676338196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,float16,0,0.02829866607983907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.03172266731659571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.3435893456141154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,float16,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,float16,0,0.01923199991385142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.015205333630243937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,float16,0,0.019039999693632126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.019487999379634857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,float16,0,0.01632533346613248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.013663999736309052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,float16,0,0.015578666081031164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,float16,0,0.013343999783198038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.014101333916187286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,float16,0,0.015290666371583939
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.015493333339691162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.013493333011865616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,float16,0,0.02938133229811986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,float16,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.014015999933083853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,float16,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,float16,0,0.013280000537633896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,float16,0,0.11523733536402385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.10176533460617065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,float16,0,0.1312320033709208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.1218239963054657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,float16,0,0.2148639957110087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.23062400023142496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,float16,0,0.06203199923038483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.05901333192984263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.1318186620871226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,float16,0,0.06780800223350525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,float16,0,0.12098133563995361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.07061333457628886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,float16,0,0.10437333583831787
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,float16,0,0.013290667285521826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,float16,0,0.056613331039746605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.1030560036500295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.062463998794555664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.03572800010442734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,float16,0,0.04071466624736786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.04159466673930486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,float16,0,0.05209066470464071
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,float16,0,0.02347733328739802
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.05807999769846598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.023584000766277313
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,float16,0,0.03169066707293192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,float16,0,0.025258667767047882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.03349866718053818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,float16,0,0.0315786674618721
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.023962666591008503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,float16,0,0.021359999974568684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,float16,0,0.0191040001809597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.019519999623298645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,float16,0,0.021221332252025604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.023418667415777843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,float16,0,0.015216000378131866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,float16,0,0.015125333021084467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,float16,0,0.03750933210055033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.0359199990828832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,float16,0,0.01735466718673706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,float16,0,0.012442667037248611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.012175999581813812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,float16,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.012229333321253458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,float16,0,0.011999999483426413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,float16,0,0.0988106628259023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,float16,0,0.1036959985891978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.09529067079226176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.025557334224383037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,float16,0,0.14845333496729532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,float16,0,0.08250133196512859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.0765173335870107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,float16,0,0.05412800113360087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.0480320006608963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.1276746690273285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,float16,0,0.05811200042565664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.05495999753475189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,float16,0,0.07066133121649425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,float16,0,0.033530667424201965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.04398933549722036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.07047999898592631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,float16,0,0.03378133227427801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.03245333333810171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,float16,0,0.04009066770474116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.0415040006240209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,float16,0,0.02552533398071925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,float16,0,0.021168000996112823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.025834667185942333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,float16,0,0.02213866760333379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,float16,0,0.02531733363866806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.08624533812204997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,float16,0,0.017407999684413273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.019402666638294857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,float16,0,0.01533866673707962
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,float16,0,0.016069332758585613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.016000000139077503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,float16,0,0.017386666188637417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.017375999440749485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,float16,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,float16,0,0.011610666910807291
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,float16,0,0.04127999891837438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.02978666623433431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,float16,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.011503999431928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,float16,0,0.01179733375708262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.025216000775496166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.01184533288081487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,float16,0,0.09288000067075093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.07679466903209686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,float16,0,0.09565333525339763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,float16,0,0.011578666667143503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.08285333216190338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.05638400216897329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,float16,0,0.061797335743904114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,float16,0,0.10843732953071594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,float16,0,0.05184000233809153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.09872532884279887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.021402666966120403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.04400533437728882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,float16,0,0.05415999889373779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.04769066472848257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.05420800050099691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.034330666065216064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,float16,0,0.03572266548871994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,float16,0,0.03162133445342382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,float16,0,0.03340800106525421
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.02922133356332779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,float16,0,0.035760000348091125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,float16,0,0.022831998765468597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,float16,0,0.02125866711139679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.019120000302791595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,float16,0,0.021151999632517498
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.019354666272799175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,float16,0,0.021877333521842957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,float16,0,0.01551466683546702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,float16,0,0.01563199982047081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.015461333096027374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,float16,0,0.01551466683546702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,float16,0,0.012015999605258306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,float16,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,float16,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,float16,0,0.06033066908518473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.011813333878914515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,float16,0,0.011002667248249054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,float16,0,0.01571200042963028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,0,0.014570667097965876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,float16,0,0.02346666653951009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,0,0.019199999670187633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.02125866711139679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.012096000214417776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,0,0.013898666948080063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.015413332730531693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,float16,0,0.01709866647919019
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.009685333197315535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,float16,0,0.01331199953953425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,0,0.009258666386206945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,0,0.010949333508809408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,0,0.009685333197315535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,float16,0,0.010591999938090643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.0102613332370917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,0,0.009568000212311745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,0,0.010879999647537867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,0,0.00890666681031386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.009754666437705358
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.009632000078757605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,float16,0,0.009328000247478485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,0,0.009242666885256767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.01452800010641416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,0,0.009269333134094873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,float16,0,0.009392000113924345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.009786666681369146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,0,0.009018666421373686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.009136000027259191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,0,0.00884799969693025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.009514666472872099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.9374720255533853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,float16,0,3.6146186192830405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,float16,0,3.6936267217000327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,3.124842643737793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.009237333511312803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.2193493843078613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,2.570138613382975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,float16,0,1.8624374071757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.540826638539632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,float16,0,4.440181414286296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,4.220800081888835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.009706666693091393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,float16,0,1.9249119758605957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,2.0196266174316406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,1.9040212631225586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,float16,0,2.292074680328369
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,1.2149813175201416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.9738133748372395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,0,0.010938666760921478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.8309280077616373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,float16,0,1.1026506423950195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.290730635325114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,float16,0,1.0251466433207195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,0.9454826513926188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,1.0339306990305583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,float16,0,1.1750293572743733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.59716268380483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.1434400081634521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,1.0807573000590007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,float16,0,0.5477759838104248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.4824959834416707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,float16,0,0.5594559907913208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,0.498741348584493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,float16,0,0.645957350730896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,0.5815786520640055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.5950293143590292
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.6502933502197266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.7459200223286946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,float16,0,2.1070399284362793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,1.8783200581868489
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,float16,0,2.196938673655192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,2.3182239532470703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,float16,0,2.6793600718180337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,float16,0,1.07915202776591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.1672320365905762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.3546454111735027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.4087680180867512
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,float16,0,1.1479520003000896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,float16,0,1.3936907450358074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.633072018623352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.3085760275522869
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,1.1998559633890789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.4076587359110515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.734666665395101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.5053439935048422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,float16,0,0.5887680053710938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,float16,0,0.615669329961141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,0.5480053424835205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,float16,0,0.7312640349070231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,0.669274648030599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.7260106404622396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.7004160086313883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.3880000114440918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.4045279820760091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.30636266867319745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,float16,0,0.34377066294352215
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.32013867298762005
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,float16,0,0.40068801244099933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,0.37431466579437256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.40487468242645264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,1.0274559656778972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,float16,0,1.5056799252827961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.2567466894785564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,float16,0,1.5660373369852703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.6752746899922689
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,1.386309305826823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,float16,0,1.9809600512186687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,1.73689603805542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.947551965713501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,float16,0,0.3335626522699992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,float16,0,0.7830453713734945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6803627014160156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,float16,0,0.8366613388061523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.9387146631876627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,0.7363573710123698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,float16,0,1.01310396194458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,0.9452319939931234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.3954453468322754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,1.012330691019694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.517685333887736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.9428533713022867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,float16,0,0.4181813398996989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.3755413293838501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,float16,0,0.4532533486684163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.4026933511098226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,0.5300373236338297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,float16,0,0.5467413266499838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5374293327331543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.5168480078379313
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.3011733293533325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,float16,0,0.25464532772699994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.3231253425280253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.25126399596532184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,0.2935306628545125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.323472003142039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.3065386613210042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.9932373364766439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,float16,0,1.9602079391479492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.6475626627604167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,float16,0,2.09114662806193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,1.816794713338216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5391306479771932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,2.403007984161377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.375322659810384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.3649439811706543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,float16,0,2.920602798461914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,float16,0,1.0012799898783367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.23728533585866293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,float16,0,0.26336000363032025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.8607839743296305
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,float16,0,0.3003679911295573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,float16,0,1.0672159989674885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,0.976261297861735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,1.2195146878560383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,float16,0,1.4303627014160156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,float16,0,0.5237173239390055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.47180267175038654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.6747626463572184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.7119680245717367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,float16,0,0.5649173259735107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,0.5167573293050131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.3772320747375488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,float16,0,0.7141173680623373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,0.6991786956787109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,float16,0,0.29149333635965985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.6922933260599772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.3798559904098511
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.7411039670308431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.27035200595855713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,float16,0,0.3107733329137166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.28442132472991943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,float16,0,0.4003680149714152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.3893119891484578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.22977600495020548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.3763680060704549
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,float16,0,0.1862773299217224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.22139734029769897
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.17522666851679483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.1840320030848185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,float16,0,0.19214399655659994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,0.2178879976272583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,float16,0,0.21709867318471274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.23410133520762125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.22218666474024454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,float16,0,1.1403573354085286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,1.0007253487904866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,float16,0,1.2439626852671306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,1.1306719779968262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.3462986946105957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,1.557360013326009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,float16,0,1.773087978363037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,float16,0,0.6000053485234579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.913370688756307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.3846026659011841
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.5305973291397095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,float16,0,0.6467146476109823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,0.5978879928588867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,0.38279998302459717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,float16,0,0.8781173229217529
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,0.9300906658172607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.4596853256225586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,float16,0,0.32636799414952594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8736053307851156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.28890132904052734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.8510560194651285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,float16,0,0.35156798362731934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.3278613289197286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,float16,0,0.481114665667216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,0.44702935218811035
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.4730293353398641
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.45934398969014484
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,float16,0,0.1867199937502543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.2577386697133382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.2582933306694031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.17625067631403604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.1901973287264506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,float16,0,0.19940799474716187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,float16,0,0.24843732515970865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,0.24292800823847452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.25546665986378986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,float16,0,0.12310399611790974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.15065067013104758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.1481653352578481
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.11435733238855998
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,float16,0,0.1267359952131907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.12381333112716675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,float16,0,0.14482667048772177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8957546552022299
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.1488533318042755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.15034133195877075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.14644267161687216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,float16,0,1.1128106911977131
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,float16,0,1.241925319035848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,1.1541279951731365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,0.9678880373636881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.46378131707509357
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,float16,0,1.9608532587687175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,float16,0,0.5743306477864584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,2.0003040631612143
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.5133866469065348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,1.0216320355733235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,1.0071893533070881
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,0.6041333278020223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,1.0336106618245442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,float16,0,0.9869333108266195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.9173173109690348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.4810026486714681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,1.0214880307515461
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.4985440174738566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,float16,0,0.3020586570103963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.2796213428179423
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.25598933299382526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,float16,0,0.34297064940134686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.3248213330904643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,float16,0,0.4869493246078491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,0.504741350809733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.2608853379885356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,float16,0,0.16909333070119223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.4986240069071452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.27763734261194867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,float16,0,0.18818666537602743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.17457600434621176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,0.2676266630490621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,float16,0,0.27851200103759766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.27613333861033124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.2696053385734558
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.15499732891718546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,float16,0,0.10979732871055603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.15026666720708212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.10354666908582051
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,float16,0,0.1163146694501241
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.1143839955329895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.14819199840227762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.15362667044003805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.15180800358454385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.08506666620572408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,float16,0,0.6447626749674479
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,float16,0,0.06863466898600261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,float16,0,0.07108266651630402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.07070933282375336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,float16,0,0.08428800106048584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.08739733695983887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.08898666501045227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.08528533577919006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.5234880050023397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.1613759994506836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,float16,0,0.6814613342285156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,0.6129120190938314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,0.7502666314442953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,float16,0,0.7816212972005209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,float16,0,1.2794293562571208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,float16,0,0.14169599612553915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.6375306844711304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.6357386509577433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.08875200152397156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,float16,0,0.3548320134480794
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.32712000608444214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,float16,0,0.40387733777364093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.3991573254267375
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,float16,0,0.6353653271993002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,0.6070666710535685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.6279946565628052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.692741314570109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.3436853488286336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,float16,0,0.19686933358510336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.17698132991790771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,float16,0,0.22411733865737915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.21767467260360718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,float16,0,0.3418666521708171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,0.3452959855397542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18528000513712564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.3832266728083293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.1911626656850179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,float16,0,0.11335466305414836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.109525332848231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.12396267056465149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,float16,0,0.18764267365137735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.17541333039601645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,1.1709493001302083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18453333775202432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.2023680011431376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.10051199793815613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,float16,0,0.07573866844177246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.1079360047976176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.07218133409818013
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.0800853321949641
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,float16,0,0.07960000137488048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,float16,0,0.09841600060462952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.10613866647084554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.10687999924023946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.09975467125574748
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06624533236026764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,float16,0,0.05425066749254862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.05223466455936432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3457546631495158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,float16,0,0.056048000852266945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.056032001972198486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,float16,0,0.06519466638565063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.068271999557813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06634133557478587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.07012266914049785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3327946662902832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,float16,0,0.6909173329671224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,0.6411413351694742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,float16,0,0.12337600191434224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,float16,0,0.8254079818725586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,0.8076053460439047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,float16,0,1.552336057027181
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.8418560028076172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.7874933083852133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,float16,0,0.36241598924001056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.3386666774749756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,1.663973331451416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,float16,0,0.4244266748428345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.42767465114593506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,float16,0,0.7587093512217203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.07034666836261749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,0.7290773391723633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.704207976659139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,float16,0,0.19370667139689127
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.7437919775644938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.18422933419545492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.4116213321685791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,float16,0,0.22694933414459229
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.2310453255971273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,0.4034666617711385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,float16,0,0.378490686416626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.36929066975911456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.4470986525217692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.20824533700942993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,float16,0,0.105621337890625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.10507200161616008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.2095359961191813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,float16,0,0.1275200049082438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,float16,0,0.21357333660125732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.21082133054733276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.20888533194859824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.12147200107574463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.11249599854151408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.06619733572006226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,float16,0,0.07683733105659485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.06512000163396199
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,float16,0,0.10090667009353638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.10973333319028218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.11191466450691223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.11415466666221619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.05898666878541311
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.0650186687707901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,float16,0,0.04292800029118856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.041637333730856575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.4443519910176595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,float16,0,0.046021332343419395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.04783466458320618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,float16,0,0.05819733440876007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.06434666613737743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.058650667468706764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.06635733445485432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.04701866706212362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.049653331438700356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.11972799897193909
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,float16,0,0.0396373321612676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.037871999045213066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.19794134298960367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.03990400085846583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,float16,0,0.06885866820812225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,float16,0,0.04614933331807455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.04801600178082784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.046053335070610046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.04993066688378652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.07644799848397572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,float16,0,0.4472693204879761
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.4177066485087077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,float16,0,0.5492586692174276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,0.5479733149210612
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,float16,0,1.0518666903177898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,0.9667946497599283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.6051466862360636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.5517813364664713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,float16,0,0.2898293336232503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.2887306610743205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.22658133506774902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,float16,0,0.5348373254140218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,0.5036373138427734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.5511893431345621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,float16,0,0.13321600357691446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.3085920015970866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.12388799587885539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.2696426709493001
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,float16,0,0.039749334255854286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.5914506514867147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.15973866979281107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,float16,0,0.1622880001862844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,float16,0,0.2812426686286926
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.2945760091145833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.27110399802525836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.1453013320763906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.2851146658261617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,float16,0,0.07681066791216533
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.156442662080129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,float16,0,0.08518933256467183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.0867680013179779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,float16,0,0.1460479994614919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.1563040018081665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.14667200048764548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.07514133552710216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,float16,0,0.04970666766166687
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.08342400193214417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.04794133206208547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,float16,0,0.05509866774082184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.05607999861240387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,float16,0,0.07234666744867961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.08090666433175404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.07342933118343353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.082997332016627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,float16,0,0.035605333745479584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.0518453319867452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.04894400139649709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,float16,0,0.037791999677817024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.033861334125200905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,float16,0,0.04577599962552389
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.050186668833096824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,float16,0,0.23873066902160645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.040250666439533234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.052005335688591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,float16,0,0.031685332457224526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,float16,0,0.033557333052158356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.03363200028737386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,float16,0,0.03766933331886927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.03952533255020777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.037978666524092354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.039962666730086006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.07397866745789845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.13896000385284424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,float16,0,0.48556800683339435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,0.46086398760477704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,float16,0,0.6204853455225626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,0.6418666839599609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.037802666425704956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.6229653358459473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,float16,0,0.25939200321833294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.7425813674926758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.04734933376312256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,float16,0,1.2300960222880046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,1.4697866439819336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.24905065695444742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,float16,0,0.3242293397585551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.3393333355585734
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,float16,0,0.6701813538869222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,0.6324373483657837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.6644213199615479
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.35356799761454266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.39662400881449383
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,float16,0,0.1808533271153768
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,float16,0,0.14103999733924866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.1815413236618042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.13341866930325827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.6853333314259847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,float16,0,0.32496533791224164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.36051734288533527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.20081067085266113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.17435733477274576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.3262346585591634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.3919680118560791
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,float16,0,0.07758933305740356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.03756800045569738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,float16,0,0.0962559978167216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.07670933504899342
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.09086933732032776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,float16,0,0.1739893356959025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.18428800503412882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.17824000120162964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.16698133945465088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,float16,0,0.0479360024134318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.09435733159383138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.0993173321088155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,float16,0,0.05629866818586985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.058101331194241844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,float16,0,0.0795360008875529
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.09100799759229024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.09506666660308838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.09668266773223877
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.029466666281223297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.05385600030422211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,float16,0,0.033904001116752625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.03573866685231527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,float16,0,0.04587199787298838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.05192000170548757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.0468746672074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.03751999884843826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,float16,0,0.027045334378878277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.03565866748491923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,float16,0,0.027621333797772724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,float16,0,0.033520000676314034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.03756800045569738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.0349386657277743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.03772799919048945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.02940266579389572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,float16,0,0.02548266698916753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,float16,0,0.027327999472618103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.025477332373460133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,float16,0,0.029466666281223297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.029466666281223297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.029658667743206024
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.02938133229811986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.04816000163555145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,float16,0,0.030389333764712017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.04610133171081543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.05435200035572052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.031354665756225586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,float16,0,0.39933331807454425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,0.3780053456624349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,float16,0,0.5350773334503174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,0.5541386604309082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.20510933796564737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,float16,0,0.2162826657295227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,float16,0,1.1679200331370037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.6225173473358154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.5879146655400594
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,float16,0,0.2889066735903422
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.29262399673461914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,1.2783839702606201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.37114667892456055
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,float16,0,0.6248319943745931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.6232213179270426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,float16,0,0.1151039997736613
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.6003733476003011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.2975626587867737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,float16,0,0.15154666701952615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.3063039978345235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.32655467589696247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.1532853345076243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.15849066774050394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.16668800512949625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.061706667145093284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,float16,0,0.3087306618690491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.29948266347249347
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,float16,0,0.07987200220425923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.0759093314409256
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.10912000139554341
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,float16,0,0.06177600224812826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,float16,0,0.16876800855000815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,0.5512266556421915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.08462933699289958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.16661866505940756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.15666666626930237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.0863146682580312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,float16,0,0.03932799895604452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.14816000064214072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,float16,0,0.045781334241231285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.04924266537030538
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.08470400174458821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.08949333429336548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,float16,0,0.07063999772071838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.037632000943024956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.04780800143877665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,float16,0,0.023200000325838726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.029493334392706554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.0233599990606308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,float16,0,0.03959999978542328
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.04573333263397217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.039664000272750854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.029066666960716248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.03139200061559677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.021029333273569744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,float16,0,0.02147199958562851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,float16,0,0.020874666670958202
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.023381332556406658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,float16,0,0.027445333699385326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.02942933390537898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.03183466692765554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.023290666441122692
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.025386666258176167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,float16,0,0.01985599969824155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.02090666691462199
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.023418667415777843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.08282666901747386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.0233599990606308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.03953066716591517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.02086399992307027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.021397332350413006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.018394666413466137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,float16,0,0.02757866680622101
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,float16,0,0.019343999524911244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.019434666881958645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.04780266682306925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.02033599962790807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.021354667842388153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,float16,0,0.17365866899490356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.1712053418159485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,float16,0,0.24397865931193033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.26241066058476764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,float16,0,0.019141333798567455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.3118240038553874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.30557332436243695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,float16,0,0.09524800380071004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.02126399924357732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,float16,0,0.5627733469009399
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,0.5647199948628744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.09270933270454407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.13588266571362814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,float16,0,0.13050666451454163
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,float16,0,0.2810506621996562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.32062933842341107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.14229866862297058
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.27993067105611164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.30167466402053833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15939199924468994
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,float16,0,0.05183466772238413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.053301334381103516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,float16,0,0.023370665808518726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.06786133348941803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,float16,0,0.07100800176461537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.14709333578745523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.07904533545176189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15893333156903586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,float16,0,0.03162133445342382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,float16,0,0.15873066584269205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.031514666974544525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07857066889603932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.16480533281962076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.042709335684776306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.07649600009123485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,float16,0,0.03924266745646795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,float16,0,0.06232533355553945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07751466830571492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.07649066547552745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,float16,0,0.019509332875410717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.03701333453257879
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,float16,0,0.023631999890009563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.027109332382678986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,float16,0,0.035589332381884255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.04363733530044556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.025285333395004272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.035775999228159584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.029482667644818623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,float16,0,0.019167999426523846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.0194560003777345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,float16,0,0.025381334125995636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.027493332823117573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.02532800038655599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019359999646743137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,float16,0,0.015205333630243937
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.015306666493415833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,float16,0,0.017045332739750545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,float16,0,0.019386666516462963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019440000255902607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.021456000705560047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.015290666371583939
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,float16,0,0.017114666601022083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.017456000049908955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.01509333277742068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.04558399816354116
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,float16,0,0.015247999380032221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,float16,0,0.01532799998919169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,float16,0,0.015306666493415833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,float16,0,0.017397332936525345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.01580799991885821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.02941333254178365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.10293866197268169
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,float16,0,0.10743467013041179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,float16,0,0.1485973298549652
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.1502133309841156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.15076266725858053
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,float16,0,0.2912213404973348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,float16,0,0.05569066603978475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.2892586588859558
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.05630399783452352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,float16,0,0.07658666869004567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.07039466500282288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,float16,0,0.16180800398190817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.1679146687189738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.07880533238252004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.1613759994506836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.14403200149536133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,float16,0,0.03421333432197571
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.03497066597143809
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,float16,0,0.04167466859022776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.04606399933497111
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,float16,0,0.06637333333492279
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.07851733267307281
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.08063999811808269
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.08495466907819112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.03754133234421412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.04595733185609182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,float16,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.021407999098300934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,float16,0,0.02568000058333079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.027552001178264618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.043968002001444496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.04571199913819631
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.0376800000667572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.1609333356221517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,float16,0,0.015285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.02722666660944621
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.02333866556485494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,float16,0,0.023242667317390442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.025333332518736523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.017375999440749485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.017103999853134155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.01523200049996376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.07995733122030894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,float16,0,0.03759466608365377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,float16,0,0.013306666165590286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.023381332556406658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,float16,0,0.01328533391157786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.019573333362738293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.07444266478220622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,float16,0,0.10034666458765666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.09066133697827657
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,float16,0,0.18557866414388022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.0952959954738617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.18773865699768066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.0884320040543874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,float16,0,0.04595200220743815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.04391466577847799
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,float16,0,0.05295466880003611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.05397333204746246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,float16,0,0.0772213339805603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.08738666772842407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.09807466467221577
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.04507733384768168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.08919999996821086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.050000001986821495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,float16,0,0.027632000545660656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.027263998985290527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,float16,0,0.03142933299144109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,float16,0,0.043893332282702126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.033610666791598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.04979733129342397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.04369066655635834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.027189334233601887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.050186668833096824
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.029520000020662945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.01905599981546402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,float16,0,0.019445333629846573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,float16,0,0.02143999934196472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,float16,0,0.025407999753952026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,float16,0,0.08346133430798848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.029461334149042766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.02938666691382726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,float16,0,0.017018667111794155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.019141333798567455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.017055999487638474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.015263999501864115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.015040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,float16,0,0.011221333096424738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,float16,0,0.011578666667143503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,float16,0,0.012741333494583765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,float16,0,0.011994666109482447
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.02736533433198929
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01743999992807706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.01232533281048139
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,float16,0,0.011413333316644033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.011882666498422623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,float16,0,0.07329066594441731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.06248533229033152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,float16,0,0.07852266728878021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.019306667149066925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.07249600191911061
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.05740800003210703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.06010133524735769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,float16,0,0.041536000867684685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.10690133770306905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.03782399992148081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,float16,0,0.044778664906819664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.04194133480389913
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,float16,0,0.05628266433874766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.05841066439946493
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.05663466453552246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,float16,0,0.025333332518736523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.035461333890755974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.03389333436886469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.024570666253566742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.06037333110968272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,float16,0,0.027488000690937042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.034128000338872276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,float16,0,0.03350399931271871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.033674667278925575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.03577066709597906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.021365332106749218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,float16,0,0.0169813334941864
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,float16,0,0.01876266673207283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,float16,0,0.02144533395767212
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.02180800090233485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.021312000850836437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.0230880007147789
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,float16,0,0.015040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,float16,0,0.10476266344388326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,float16,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,float16,0,0.011141333729028702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.028597332537174225
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.011802667131026586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,float16,0,0.011338666081428528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,float16,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,float16,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.012058666596810022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,float16,0,0.011642667154471079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.011413333316644033
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,float16,0,0.011701333026091257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.011370666325092316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.02313599983851115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.05604266623655955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,float16,0,0.06656533479690552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,float16,0,0.07062933345635732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.01157333329319954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.04727466901143392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.062362665931383766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,float16,0,0.08179200192292531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04585599899291992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.07945600152015686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.012245333443085352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.03363733241955439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,float16,0,0.040362666050593056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,float16,0,0.046053335070610046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.04574933151404063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.046522667010625206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04553600152333578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.02741866558790207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.02829866607983907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,float16,0,0.023589332898457844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.021530665457248688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,float16,0,0.02532800038655599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,float16,0,0.02779199928045273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.02346133440732956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.02759466568628947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,float16,0,0.01714666684468587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,float16,0,0.01893866683046023
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.01729600007335345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.01720000058412552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,float16,0,0.039642666776975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,float16,0,0.01563199982047081
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.03586133321126302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01341333364446958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,float16,0,0.011002667248249054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,float16,0,0.010890666395425797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.011391999820868174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.011381333072980246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,float16,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.011616000284751257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.013232000172138214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.011749333391586939
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.011616000284751257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,float16,0,0.011370666325092316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,float16,0,0.011690666278203329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.011882666498422623
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.018207999567190807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,0,0.011637333780527115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,float16,0,0.010928000013033548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,0,0.015317333241303762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,0,0.01912533367673556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.01913600042462349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,0,0.011488000551859537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,0,0.014901333798964819
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.017456000049908955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.015301333119471868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.019050666441520054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013349333157142004
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,0,0.009306666751702627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.010138666878143946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,0,0.010842667271693548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,0,0.011071999867757162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.015184000134468079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,0,0.0102186668664217
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.00966933307548364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.009663999701539675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,0,0.00914666677514712
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.009557333464423815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.009653333574533463
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.009296000003814697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,0,0.009354666496316591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,0,0.009029333169261614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.00916800027092298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.009418666362762451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,0,0.00878399983048439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,0,0.009029333169261614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.009349333122372627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.008901333436369896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.009098666409651438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,0,0.009301333377758661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,1.6125814119974773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,float16,0,1.894576072692871
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.3964799245198567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,1.7917173703511555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,float16,0,2.0209226608276367
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.00926399976015091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,float16,0,1.0150720278422039
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.280138651529948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,0.874783992767334
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,float16,0,1.0852906703948975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,1.0448853174845378
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.3823787371317546
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.7594613234202067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,float16,0,0.5565439860026041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,0.5152586698532104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.7297813097635905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.4380213419596355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,float16,0,0.597050666809082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,0.5448533296585083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,float16,0,0.3417653242746989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.4049546718597412
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.7224799791971842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.746992031733195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.31641600529352826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,float16,0,0.35313065846761066
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,0.33765331904093426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.009232000137368837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.4031999905904134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.4331039985020955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,float16,0,1.140229304631551
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,0.9832639694213867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,1.1121546427408855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.8602346579233805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.9160266717274984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,float16,0,0.6094719966252645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,0.5439253250757853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,float16,0,0.6645973523457845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,0.6124266783396403
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.4907146692276001
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.8520053227742513
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,float16,0,0.34062933921813965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.47171199321746826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.3192053238550822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,float16,0,0.37059199810028076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,0.3388479948043823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.4949546655019124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.2737119992574056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.48074666659037274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.27348800500233966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,float16,0,0.21910399198532104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.20622400442759195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,float16,0,0.23206400871276855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,0.2213546633720398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.46245865027109784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.26977066198984784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.27300800879796344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,float16,0,0.8154773712158203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,0.7262720266977946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,float16,0,0.9022453625996908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,0.8312319914499918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6894293626149496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,float16,0,0.445525328318278
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6490186850229899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,float16,0,1.2311253547668457
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,float16,0,0.4899306694666545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,0.4597173531850179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6889066696166992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,float16,0,0.26190932591756183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.3762133518854777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.9163946310679117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.24954134225845337
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,float16,0,0.2809866666793823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.3647040128707886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.395087997118632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.20257065693537393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,float16,0,0.16181866327921549
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.15362667044003805
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,float16,0,0.1723840037981669
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,0.1665173371632894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.20378132661183676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,float16,0,1.0639466444651287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,0.9266506830851237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.3976000150044759
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,float16,0,1.1946933269500732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,1.1023039817810059
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6705280145009359
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.3935199975967407
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,float16,0,0.5569493373235067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.9259680112202963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,0.27056533098220825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,0.5035946766535441
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,float16,0,0.6334880193074545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,0.5972319841384888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.20857600371042886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.9739999771118164
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.48013333479563397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.9171520074208578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.2818880081176758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.20879467328389487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,0.33048532406489056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.4977653423945109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.272213339805603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.28330133358637494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,float16,0,0.19076265891393027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.18293333053588867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,float16,0,0.2044853369394938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,0.20003734032313028
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.2648853262265523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.15315199891726175
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.28777599334716797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,float16,0,0.12267200152079265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.16107199589411417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.11770133177439372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,float16,0,0.1292586624622345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.12796266873677573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.1534346640110016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.9800053437550863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.16059733430544534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,float16,0,0.6454559961954752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,0.5840853452682495
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,float16,0,0.7400586605072021
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,0.7132533391316732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.5050239960352579
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.6234399875005087
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,float16,0,0.3479893207550049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.3214400013287862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,float16,0,0.35020267963409424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,float16,0,0.3975893259048462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.4797866741816203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,0.3896799882253011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.6377973159154257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,float16,0,0.19288533926010132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.3466879924138387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.631279985109965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.18777066469192505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,float16,0,0.22431999444961548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,0.21236266692479452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.34674668312072754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.3452320098876953
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.18935465812683105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,float16,0,0.1269546647866567
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.12326400478680928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,float16,0,0.13795733451843262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.13702932993570963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.18921599785486856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11776533722877502
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.18329066038131714
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.1237386663754781
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,float16,0,0.0953439970811208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,float16,0,0.09927999973297119
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.09904533624649048
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11718400319417317
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,float16,0,0.3061013420422872
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.6575253407160441
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,0.5796693166097006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,float16,0,0.6368693510691324
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.34748268127441406
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,0.7617653210957845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,float16,0,0.33646400769551593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.3142399986584981
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.1755680044492086
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.7280373573303223
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,float16,0,0.4092906713485718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,0.40213334560394287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.7486240069071451
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.3800640106201172
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.4052160183588664
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.09086400270462036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,float16,0,0.18631466229756674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,float16,0,0.2264853318532308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.22364266713460287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.3757226864496867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.12396267056465149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.404090682665507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.21787200371424356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,float16,0,0.11581866939862569
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.1954186757405599
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.11217066645622253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,float16,0,0.12592533230781555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.13242133458455405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.21118932962417603
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.10322133700052898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,float16,0,0.7668853600819906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.114138662815094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,float16,0,0.0720960001150767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.07029866675535838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,float16,0,0.07881600161393483
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.08075733482837677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.10345066587130229
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.11372266213099162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.08090666433175404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08501866459846497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,float16,0,0.06618666648864746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.06433600187301636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,float16,0,0.06920533378918965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.06863999863465627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.08132266501585643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08699199557304382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.7254026730855306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.3792320092519124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,float16,0,0.4008106787999471
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,float16,0,0.504752000172933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,0.510042667388916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.17357865969340006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.5245653390884399
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,float16,0,0.22071999311447144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.2103253404299418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.518447995185852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,0.2757226626078288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.525871992111206
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.5122453371683756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,float16,0,0.12014399965604146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.11994133392969768
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,float16,0,0.15121066570281982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.14170666535695395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.2697439988454183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.273637334505717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.12897066275278726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.14509866635004678
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,float16,0,0.07899199922879536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.07871999839941661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.7646400133768717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,float16,0,0.09014399846394856
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.0937653382619222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.13267200191815695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.08029866715272267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.14469866951306662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.08913066983222961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,float16,0,0.05638400216897329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.054885332783063255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,float16,0,0.06235733131567637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.06411733229955037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.07961600025494893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.08922132849693298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.06417066852251689
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,float16,0,0.05215999980767568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.051925331354141235
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,float16,0,0.2771040002504985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,float16,0,0.05564799904823303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.06432533264160156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.06711466610431671
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.2696213324864705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.20173333088556925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,float16,0,0.41338666280110675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,0.4045120080312093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,float16,0,0.5538026491800944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,0.5824480056762695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.6532853444417318
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.6148480176925659
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,float16,0,0.2283680041631063
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,float16,0,0.29153066873550415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.21840532620747885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,0.3105120062828064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.6382453441619873
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.3250933289527893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,float16,0,0.12391466895739238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.33936532338460285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.05586666862169901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.7398080031077067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,float16,0,0.1625333329041799
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.2808213432629903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.1625653306643168
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.3243146737416585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.3246026635169983
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.17059733470280966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.1695573329925537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,float16,0,0.07671999931335449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.07446933289368947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,float16,0,0.08880533774693807
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.09315199653307597
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.16740800937016806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.1558133363723755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.0768746683994929
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.09106133381525676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.04711466530958811
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,float16,0,0.05418666700522105
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.05797866483529409
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.0912000040213267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.056101332108179726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,float16,0,0.04081066697835922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.040336000422636666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,float16,0,0.044981335600217186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.056373332937558494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.0621066689491272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.046053335070610046
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.04784533381462097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,float16,0,0.03790933390458425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.037834666669368744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,float16,0,0.039818666875362396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.039690665900707245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.06660800178845723
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.04609066744645437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.047882666190465294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.11487467090288798
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,float16,0,0.2810080051422119
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.27713600794474286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.4620213508605957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,float16,0,0.3829226493835449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,0.4047360022862752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.46245865027109784
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.15227733055750528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,float16,0,0.04586133360862732
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,float16,0,0.2102186679840088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.21997867027918497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.458682656288147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.076773335536321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.23337066173553467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.06252266466617584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.4596800009409587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.08488000432650249
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,float16,0,0.08338133494059245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,float16,0,0.11473600069681804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.24753065903981528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.10717333356539409
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.2325813372929891
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.24590933322906494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,float16,0,0.0552106648683548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.10458667079607646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.05469333132108053
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,float16,0,0.06625600159168243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.07070399820804596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.10393066207567851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.12142399946848552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.06010666489601135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,float16,0,0.03760000069936117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.0378560001651446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.07187733550866444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,float16,0,0.04387199878692627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.04578666885693868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.06035199761390686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.07063999772071838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.04562666515509287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,float16,0,0.03372266640265783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,float16,0,0.0364533339937528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.037802666425704956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.04580800235271454
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.03779733429352442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.03993066648642222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,float16,0,0.03176533430814743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.031632001201311745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,float16,0,0.03389333436886469
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,float16,0,0.15519466996192932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.03779733429352442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.0401653324564298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.12159466743469238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.31646933158238727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,float16,0,0.4533226490020752
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,0.49191466967264813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.05019199848175049
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.5894879897435507
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,float16,0,0.17338667313257852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5815999905268351
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.05000533163547516
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5839680035909017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,float16,0,0.24915200471878052
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.2598666747411092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.2815733353296916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.28729067246119183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.6593439976374308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,float16,0,0.09307199716567993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.09131733576456706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.1376053293546041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.15527466932932535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.29185599088668823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.1536799967288971
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,float16,0,0.05550399919350942
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.28255999088287354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.05592533449331919
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,float16,0,0.3142293294270833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.07640533149242401
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,float16,0,0.0662720004717509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.15428266922632852
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.1437440017859141
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.06457066535949707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,float16,0,0.03377600014209747
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.03551466763019562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,float16,0,0.04176533222198486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.07855466504891713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04373333354791006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.0788213312625885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.050714666644732155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,float16,0,0.027647999425729115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,float16,0,0.032170665760835014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.03439466655254364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.050106664498647056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.03350933392842611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.036271999279658
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,float16,0,0.02566933383544286
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04374399781227112
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.025642665723959606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,float16,0,0.02734400083621343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.03629333277543386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.033728001018365227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.02934933453798294
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.02937600016593933
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.16926399866739908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,float16,0,0.025370667378107708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.02367466688156128
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,float16,0,0.026309333741664886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.025594666600227356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.029311999678611755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,float16,0,0.13563199838002524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.030159999926884968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.06443733473618825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,float16,0,0.1467626690864563
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,float16,0,0.27617599566777545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.5258880058924357
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.2741600076357524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5535039901733398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.14628799756368002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,0.4455626805623372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,float16,0,0.4169333378473918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,float16,0,0.22183465957641602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.2671999931335449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.26418666044871014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5195733308792114
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.5423839886983236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,float16,0,0.12185066938400269
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.2651306589444478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.11981333295504253
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,float16,0,0.07989866534868877
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.12517333030700684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.047151997685432434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,float16,0,0.044997334480285645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.14069333672523499
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,float16,0,0.056976000467936196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.06622399886449178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.13491732875506082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.14459199706713358
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.05821866790453593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.0726560006539027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,float16,0,0.027562665442625683
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,float16,0,0.03576533248027166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.039477333426475525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.05815466741720835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.07261866827805837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.037808001041412354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.045706664522488914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,float16,0,0.0233599990606308
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.02316266546646754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,float16,0,0.02571200082699458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.027888000011444092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.23014932870864868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.04576533536116282
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.02754666656255722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.030586667358875275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,float16,0,0.01940800001223882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,float16,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.022848000129063923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.027301333844661713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.030234667162100475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.023221333821614582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.023589332898457844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,float16,0,0.019130667050679524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,float16,0,0.019445333629846573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.023306667804718018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.023455999791622162
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.019765333582957584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.27753599484761554
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.019509332875410717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,float16,0,0.019215999792019527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,float16,0,0.01911466692884763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.01987733319401741
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.019834666202465694
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.2545599937438965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.12761066357294717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,float16,0,0.12504000465075174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.2683626612027486
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,float16,0,0.19765333334604898
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.2219840089480082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,float16,0,0.0693333347638448
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.06628266473611195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,float16,0,0.11150399843851726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.07239466905593872
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.25602134068806964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.26984532674153644
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.12693867087364197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.1348426640033722
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,float16,0,0.03771200031042099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.03990933299064636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,float16,0,0.049813335140546165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.13145599762598673
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.12481600046157837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.054618666569391884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.07055999835332234
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,float16,0,0.023525332411130268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.0378560001651446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,float16,0,0.03141333411137263
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.054192001620928444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.07020799815654755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.041946664452552795
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,float16,0,0.019199999670187633
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.021274665991465252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,float16,0,0.02319466571013133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.025589334468046825
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.035642666121323906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.04192000130812327
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.02386133372783661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.02794133375088374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,float16,0,0.01708799973130226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,float16,0,0.019109333554903667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.01940800001223882
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.11137066284815471
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.02436800052722295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.027647999425729115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.01941866676012675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,float16,0,0.017024000485738117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.017152000218629837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.06109866499900818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.02584533393383026
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,float16,0,0.015392000476519266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.03550933301448822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.01708799973130226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.03777066618204117
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015365333606799444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,float16,0,0.015114666273196539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,float16,0,0.015216000378131866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.015285332997639975
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.015322666615247726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015397333850463232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,float16,0,0.01586666703224182
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,float16,0,0.07321600119272868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.01926933353145917
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.06990399956703186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,float16,0,0.11437867085138957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.12598400314648947
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.13985066612561545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,float16,0,0.04196799794832865
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.04223999877770742
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.11761066317558289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,float16,0,0.05201066533724467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.0637546678384145
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.056234667698542275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,float16,0,0.025429333249727886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.13986666997273764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.1269760032494863
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.02734400083621343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,float16,0,0.033376000821590424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.037802666425704956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.05602666735649109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.0335413341720899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.0705813318490982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,float16,0,0.01720533271630605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,float16,0,0.021146667500336964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.023418667415777843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.033573334415753685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.021717332303524017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,float16,0,0.015471999843915304
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.02571200082699458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.02143999934196472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.017082666357358296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.019141333798567455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.01708799973130226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.014938666174809137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,float16,0,0.022410665949185688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.07196799914042155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03994133323431015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03986666599909464
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,float16,0,0.05213866631189982
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.06238933404286703
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,float16,0,0.06582933167616527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.07124266525109609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.03175999969244003
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07668800155321757
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,float16,0,0.03888533264398575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.043653334180514015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.062309334675470986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,float16,0,0.02123733361562093
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.04364799956480662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.03570133447647095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07654933134714763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,float16,0,0.02533866713444392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.026704000929991405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.03570666660865148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.04381866753101349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.021157334248224895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.025216000775496166
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,float16,0,0.015226667126019796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,float16,0,0.015306666493415833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.02147199958562851
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.05207466582457224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,float16,0,0.013232000172138214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.01699200024207433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,float16,0,0.012821332861979803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,float16,0,0.031856000423431396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.012154666086037954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,float16,0,0.011546666423479715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.012389333297808966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,float16,0,0.011296000331640244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.011472000430027643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.013194666554530462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.011589333415031433
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.0252960001428922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.011359999577204386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,float16,0,0.04454400142033895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.04153066625197729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,float16,0,0.051594664653142296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.042650664846102394
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.051818668842315674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,float16,0,0.027285332481066387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.048170665899912514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,float16,0,0.031557333966096245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.02531733363866806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.03187733391920725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.04265599946180979
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.027952000498771667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,float16,0,0.017525333911180496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,float16,0,0.0194560003777345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012074666718641916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.021029333273569744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.025301332275072735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.028618666032950085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.017162666966517765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.013376000026861826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,float16,0,0.014864000181357065
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.014544000228246054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.013178666432698568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.012634667257467905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,float16,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,float16,0,0.011541333049535751
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.01181866725285848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,float16,0,0.011600000162919363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.012453333785136541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,float16,0,0.011482667177915573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.04892266790072123
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.011391999820868174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,float16,0,0.010847999403874079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,float16,0,0.04101333270470301
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.0355679988861084
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,float16,0,0.0444106658299764
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.04181866844495138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.033488000432650246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,float16,0,0.02554133286078771
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.023589332898457844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.025616000096003216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,float16,0,0.027322667340437572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.034074666599432625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.03383466601371765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.021274665991465252
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.021338666478792827
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.01736533393462499
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,float16,0,0.013290667285521826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,float16,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.011600000162919363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,float16,0,0.011349332829316458
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,float16,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.010842667271693548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.011322667201360067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,float16,0,0.017055999487638474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.02128000060717265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,float16,0,0.010933333386977514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.011039999624093374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,float16,0,0.010879999647537867
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.01121066634853681
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.015109332899252573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,0,0.01090666651725769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.011429333438475927
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,0,0.009088000282645226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.011391999820868174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.011002667248249054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.010117333382368088
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,0,0.009216000015536943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.009322666873534521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.009632000078757605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.010890666395425797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.014906667172908783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.009349333122372627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,0,0.009589333087205887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,0,0.009232000137368837
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.009493333597977957
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,0,0.009088000282645226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.009237333511312803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.009290666629870733
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,0,0.00879466657837232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,0,0.009082666908701261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,0,0.00922133338948091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.009242666885256767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.009349333122372627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.6619893312454224
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,float16,0,0.5871146519978842
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,float16,0,1.0672693252563477
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,0.5249173243840536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.6524906555811564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.3954186836878459
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.3624639908472697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,float16,0,0.35140268007914227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,0.33048532406489056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.39479466279347736
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.36103999614715576
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.23990400632222494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.2328640023867289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,float16,0,0.22669865687688193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,0.21397866805394491
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.2408906618754069
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.2328266700108846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.4248160123825073
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,float16,0,0.6501546700795492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,0,0.009509333098928133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.4097866614659627
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,0.33429865042368573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.42897601922353107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.40974398454030353
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.6254666646321615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,0.947493314743042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.2451253334681193
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.24471465746561685
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,float16,0,0.2269973357518514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,0.21775466203689575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.2537546753883362
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.24313066403071085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.18402665853500366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.631061315536499
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,float16,0,0.17174933354059854
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.16434133052825928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.18434667587280273
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,float16,0,0.4859306812286377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.33369600772857666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,0.45081067085266113
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.31885333855946857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,float16,0,0.2809600035349528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,0.26602667570114136
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.3310133417447408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.18758400281270346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.3118186593055725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.1886720061302185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,float16,0,0.1694773236910502
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,0.16267733772595724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.18878400325775146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.18702399730682373
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.15390933553377786
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.15219199657440186
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,0.599237322807312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,float16,0,0.14480533202489218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.13983466227849325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.15436800320943198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.15250666936238608
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,float16,0,0.36823467413584393
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,float16,0,0.6222666501998901
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,0.5778133471806844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.41146667798360187
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.417738676071167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,float16,0,0.34185067812601727
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,0.32174932956695557
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.24225066105524698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.1805386741956075
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.2311840057373047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.420576016108195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,float16,0,0.19907732804616293
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.1816106637318929
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.246288001537323
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.14455999930699667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.22636799017588297
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.14462400476137796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,float16,0,0.12863467137018839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.12502400080362955
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.1455733378728231
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.14377066493034363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.126202662785848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.12558933099110922
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,float16,0,0.1179146667321523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.1146453320980072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.1257866621017456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,float16,0,0.3944586515426636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.2795253396034241
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,float16,0,0.22124266624450684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,0.21172267198562622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.27481067180633545
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.2826506694157918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.16028799613316855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.1601759990056356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,float16,0,0.1355893313884735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.13424000144004822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.15254933635393778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.15913599729537964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.11055999994277954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.11241599917411804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,float16,0,0.09922132889429729
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.09731733798980713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.10998933513959248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.11191999912261963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09714667002360027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,float16,0,0.09102400143941243
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.4121760129928589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.08876799543698628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09703466296195984
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09699733058611552
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,0.19968533515930176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.30702932675679523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.2884959975878398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,float16,0,0.39610668023427326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,0.39044801394144696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.1262986660003662
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,float16,0,0.218176007270813
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,0.21268266439437866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.27832533915837604
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.16828266779581705
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.2908906737963359
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.30663466453552246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,float16,0,0.12621866663297018
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.1276533305644989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.15973866979281107
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.17204799254735312
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.15787200133005777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.09286933143933614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,float16,0,0.07896000146865845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09869866569836934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09876799583435059
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07654933134714763
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,float16,0,0.06868266562620799
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.06834666430950165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07660800218582153
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.0790773332118988
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.06842666864395142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,float16,0,0.06426133215427399
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.06291733185450236
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.06825600067774455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09724266330401103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,float16,0,0.26265599330266315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,0.26314665873845416
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.2058239976565043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,0.37751468022664386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.214303990205129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,float16,0,0.1476533313592275
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.13750933607419333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.07878933350245158
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.09282666444778442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.20438400904337564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.2151040037473043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.08016533156236012
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.10507200161616008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.11640000343322754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,float16,0,0.08990933497746785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.09038399656613667
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.11217600107192993
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.1125866671403249
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.07303999861081441
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06836266815662384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,float16,0,0.06206400195757548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.06274666885534923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.07807466884454091
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.06015466650327047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.06214400132497152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,float16,0,0.05554133156935374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06861866513888042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.055349335074424744
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.060229331254959106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.06235733131567637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.05500266452630361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,float16,0,0.051813334226608276
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.05390933156013489
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,float16,0,0.2903839945793152
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.22732800245285034
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,0.28860799471537274
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,float16,0,0.160671999057134
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.2444053292274475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.15337066849072775
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.24388800064722696
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.23114667336146036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.12356799840927124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.11906133095423381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.12566399574279785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,float16,0,0.08658132950464885
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.0904906690120697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.07667733232180278
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.072543998559316
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.11845333377520244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.07443200051784515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.05410666763782501
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,float16,0,0.052890668312708534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06836799780527751
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.07508799930413564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.05594133337338766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.05415999889373779
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.04586666822433472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.05187733471393585
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.05606399973233541
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.043968002001444496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.054154664278030396
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,float16,0,0.03995199998219808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.03967999915281931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.043978666265805565
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.045909335215886436
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.039887999494870506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.039808000127474465
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,float16,0,0.03794133414824804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03963200002908707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03977066775163015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,float16,0,0.20534400145212808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.1646773318449656
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.20676799615224203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.17838400602340698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,float16,0,0.10942932963371277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.10381866494814555
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.08075733482837677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.1771893302599589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.08833066622416179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06819733480612437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,float16,0,0.06418666740258534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.051914667089780174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,float16,0,0.043578664461771645
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.0666293352842331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.08433066805203755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.04587199787298838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.09199999769528706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.054117331902186074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,float16,0,0.042378668983777366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.0454720010360082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.045882667104403176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.05395199855168661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.03771200031042099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,float16,0,0.03584533433119456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.03764266769091288
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.04178133110205332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.035760000348091125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.03755199909210205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,float16,0,0.03366933266321818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.0336053321758906
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.03550933301448822
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.03365333378314972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.03366933266321818
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,float16,0,0.031717332700888314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.031471999982992806
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.033674667278925575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.03363200028737386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.16476800044377646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,float16,0,0.23566399017969766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.19382933775583902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.2427519957224528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.059861332178115845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,float16,0,0.12853866815567017
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.041663999358812966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.1267253359158834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.045567999283472695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.19800533850987753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.11033599575360616
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09897067149480183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.21553067366282144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,float16,0,0.06420266628265381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.07141866783301036
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.10250133275985718
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09902933239936829
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.054901331663131714
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.06311999758084615
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,float16,0,0.039701332648595176
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.043391997615496315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.054570664962132774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.0629066675901413
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.039733332892258964
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.03372266640265783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.03967999915281931
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.044079999128977455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.03179199993610382
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03349333256483078
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,float16,0,0.02769600103298823
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.028090665737787884
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.031914666295051575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03363733241955439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.02735999971628189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.02775999903678894
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,float16,0,0.025941332181294758
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.025594666600227356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.0276053324341774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.025466665625572205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.025568000972270966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,float16,0,0.025279998779296875
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.023376000424226124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.025498665869235992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.025392000873883564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.06044800082842509
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.2012373407681783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.1783626675605774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,float16,0,0.11176533500353496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,float16,0,0.20440000295639038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.2187946637471517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.04404800136884054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.11210667093594869
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.17940799395243326
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.20045334100723267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,float16,0,0.05597866574923197
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.09648000200589497
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.08969066540400188
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.062133332093556724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.09620799620946248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.04818133513132731
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05702400207519531
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,float16,0,0.033717334270477295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.03755199909210205
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05730666716893514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.047983999053637184
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.039333333571751915
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.03388266762097677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.21559999386469522
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.038618666430314384
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.02526933451493581
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.028351999819278717
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.023221333821614582
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,float16,0,0.021386665602525074
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.02533866713444392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.02758399893840154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.021295999487241108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.02313599983851115
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,float16,0,0.019498666127522785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.02093333254257838
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.02164799968401591
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.02314666658639908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.019845332950353622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.02142400046189626
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.019744000087181728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.021141332884629566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.019215999792019527
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,float16,0,0.019146667172511418
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.01913600042462349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.09640000263849895
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.08746133248011272
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.03347733368476232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,float16,0,0.10373333096504211
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.10337066650390625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.08898133039474487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,float16,0,0.04776533444722494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.05599466462930044
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.09020266930262248
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.044906665881474815
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.05400000015894572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,float16,0,0.031386665999889374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.03419733295838038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.04543466866016388
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.05384000142415365
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.03146133323510488
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.03554133325815201
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,float16,0,0.031557333966096245
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,float16,0,0.023413332800070446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.025418666501839954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.023413332800070446
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.025653332471847534
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,float16,0,0.019194666296243668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.01951466624935468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.023232000569502514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.01923199991385142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.01941866676012675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.0194560003777345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,float16,0,0.025301332275072735
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.017258666455745697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.017210666090250015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,float16,0,0.015498666713635126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.015381333728631338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.015200000256299973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,float16,0,0.015344000111023584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.01498666654030482
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.08925333619117737
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.04651199777921041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,float16,0,0.03178133318821589
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.03136533250411352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,float16,0,0.05206400156021118
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.0580320010582606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.03566399961709976
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.046096002062161766
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.027845333019892376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03342399994532267
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.05425066749254862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,float16,0,0.021210665504137676
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.023205332458019257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.01945066700379054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.027450665831565857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.023391999304294586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.01525866612792015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,float16,0,0.01526933287580808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.02107200026512146
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,float16,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.015279999623696009
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.01370666672786077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,float16,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.05531733234723409
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,float16,0,0.013178666432698568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.035690667728583016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.031498665610949196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03707200040419897
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.0334346666932106
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,float16,0,0.03833066672086716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.041722665230433144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.023232000569502514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,float16,0,0.0236160010099411
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.025568000972270966
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.03145066648721695
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.020928000410397846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,float16,0,0.01724799970785777
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.0200853335360686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.02317333221435547
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015157333264748255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015119999647140503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,float16,0,0.011120000233252844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.012015999605258306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,float16,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,float16,0,0.011066666493813196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,float16,0,0.011007999380429586
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03608000030120214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.023376000424226124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,float16,0,0.014511999984582266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.03150933235883713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025562666356563568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,float16,0,0.01945066700379054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.019386666516462963
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.02333866556485494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012223999947309494
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025487999121348064
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.015552000453074774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.015322666615247726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.011338666081428528
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.01357866699496905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,float16,0,0.012357333054145178
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.01157333329319954
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.011546666423479715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.011381333072980246
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.011621333658695221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.011445333560307821
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.011440000186363855
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,float16,0,0.01201066623131434
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,float16,0,0.0316746657093366
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.011402666568756104
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,float16,0,0.015184000134468079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.01951466624935468
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,float16,0,0.02743999908367793
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.02555199960867564
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.019440000255902607
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.011642667154471079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,float16,0,0.01930133377512296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.015103999525308609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.019178666174411774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.019391999890406925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,float16,0,0.01321600005030632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.011296000331640244
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.015114666273196539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,float16,0,0.01210133358836174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,float16,0,0.010938666760921478
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.010847999403874079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.011701333026091257
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,0,0.012181332955757776
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.011674666156371435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.010666667173306147
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,0,0.009103999783595404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.010725333044926325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.009322666873534521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.009183999771873156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,0,0.009103999783595404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.009328000247478485
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.009301333377758661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.00915733352303505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.009392000113924345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,0,0.009258666386206945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.009119999905427298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.009082666908701261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,0,0.009119999905427298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.009093333035707474
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.38676265875498456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.35277867317199707
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.38654931386311847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.23685866594314575
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.22978132963180542
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.2382240096728007
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.22809600830078125
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.22224533557891846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,0,0.009136000027259191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.21225599447886148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.22018667062123617
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.21212265888849893
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.009258666386206945
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2416373292605082
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.23995200792948404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.24621333678563437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.23676266272862753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.18152532974878946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.17481066783269247
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.18172266085942587
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.1760480006535848
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.16874132553736368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.1625226636727651
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.1684373418490092
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.16249066591262817
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.18369599183400473
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.18236267566680908
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.18307733535766602
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.1525226632754008
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.14812800288200378
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.15313599507013956
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.14841600259145102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.1430186629295349
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13793599605560303
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.14260799686113992
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13825066884358725
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.23846399784088135
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.21978666385014853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.23632532358169556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.14017599821090698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.21975467602411905
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.1400159994761149
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.12442133824030559
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.14017599821090698
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.12434666355450948
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.12284800410270691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.11335466305414836
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.35471999645233154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.11636799573898315
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.11353600025177002
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.14877866705258688
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.15627732872962952
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.15606932838757834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.15294399857521057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.1086133321126302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10883200168609619
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.10946133732795715
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10941333572069804
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09673066933949788
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.09514133135477702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09723732868830363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.0953439970811208
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.09018133083979289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.0902933379014333
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08894933263460796
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.18370133638381958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.16082666317621866
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.15628266334533691
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.1623306671778361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.15842666228612265
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.09084266424179077
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.09455999732017517
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.0906986693541209
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.09434133768081665
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.0765066643555959
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.07709333300590515
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.07577066620190938
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.076773335536321
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.12176533540089925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.06858666737874348
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06821333368619283
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.0682773341735204
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.06410666803518932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.06235733131567637
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.06414400041103363
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.06256533165772755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.1030506690343221
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.1122773289680481
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.10478400190671285
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.11210133632024129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.07034666836261749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.07426133255163829
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.07190933326880138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.07448000212510426
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.05955733358860016
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.06205866734186808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.062181333700815834
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.05388799806435903
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.05406400064627329
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08881066242853801
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.054042667150497437
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.050197333097457886
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.050240000089009605
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.05086933573087057
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.0503359983364741
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.12353600064913432
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.11989866693814595
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.14171733458836874
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06825600067774455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06494399905204773
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.11291733384132385
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.11566932996114095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.07039999961853027
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06449600060780843
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.05041066805521647
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.07034666836261749
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.05429866909980774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.051445335149765015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.05421333511670431
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.04377600053946177
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.04572266836961111
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.04376000165939331
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.039647998909155525
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03958933303753535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.039546666045983635
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03977599988381068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.06043200194835663
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03769599894682566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.037530665596326195
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03772266705830892
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.037765334049860634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.05388266841570536
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07666666805744171
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08638933300971985
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07855466504891713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.051914667089780174
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08477333188056946
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.05609600245952606
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.052149335543314614
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.04158399999141693
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.043935999274253845
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.05607999861240387
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.043621331453323364
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.03783999880154928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.03565333286921183
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.03759466608365377
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.03355200091997782
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.03369066615899404
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.033770665526390076
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.03143999973932902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.12285866340001424
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.0317493329445521
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.031770666440327965
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.05204799771308899
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.09422933061917622
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.05946133534113566
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.09974400202433269
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.0995199978351593
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.03809066613515218
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.05228800078233083
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.05877333382765452
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.04190933207670847
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.03941866755485535
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.04586666822433472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.04204266766707102
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.03133333226044973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.03363733241955439
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.02741866558790207
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.027664000789324444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.027322667340437572
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.025424001117547352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.025493333737055462
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.02553066611289978
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.025557334224383037
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.02334933231274287
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.023743999501069386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.02385066697994868
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.035461333890755974
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.09178666273752849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.08623466889063518
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.09130133191744487
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.08758399883906047
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.045696000258127846
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.05301866432030996
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03260799994071325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.04614933331807455
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.03699733316898346
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03140799949566523
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.02536533276240031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.10037866234779358
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.027280000348885853
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.025274666647116344
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.021349333226680756
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.021536000072956085
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.021194666624069214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.02237333357334137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.03156266609827677
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.019130667050679524
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.01952533299724261
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.019744000087181728
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.018933333456516266
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.01905599981546402
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.02515200028816859
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.01860800012946129
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.018992000569899876
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.019029332945744198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.0431573341290156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.042591998974482216
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.049813335140546165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.029525332152843475
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.03372266640265783
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.029370665550231934
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.05328533550103506
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.023376000424226124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.03783999880154928
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.0227360005180041
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.01930133377512296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.025621332228183746
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.019487999379634857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.017194667210181553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.017194667210181553
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.019253333409627277
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.015141333142916361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.015290666371583939
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.015103999525308609
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.015376000354687372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015114666273196539
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015141333142916361
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.049882665276527405
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.027327999472618103
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03143466760714849
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.02734400083621343
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.01945066700379054
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.01941866676012675
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.023365333676338196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.015317333241303762
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.01912533367673556
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.011642667154471079
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.0116799995303154
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.012469333906968435
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.011861333002646765
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.021253332495689392
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.02521066615978877
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.023210667073726654
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.020848001043001812
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013178666432698568
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013167999684810638
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.013295999417702356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012576000144084295
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.011125333607196808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.012015999605258306
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.011525332927703857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.011450666934251785
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.01349866638580958
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.017210666090250015
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.013557333499193192
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.011594666788975397
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.011365332951148352
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.011274666835864386
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.011194666226704916
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.012421333541472753
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.011322667201360067
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.015013333410024643
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.01516266663869222
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.01492799942692121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.013242666920026144
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.011946666985750198
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.011178666104873022
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.011525332927703857
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.0120319997270902
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.011125333607196808
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.011717333147923151
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.012047999848922094
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.009429333110650381
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.00919999989370505
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.009301333377758661
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.009242666885256767
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.012117333710193634
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.011328000575304031
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.008826666822036108
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.14.1,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.009093333035707474
